1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/imgact.h> 36 #include <sys/imgact_aout.h> 37 #include <sys/imgact_elf.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/signalvar.h> 45 #include <sys/syscallsubr.h> 46 #include <sys/sysent.h> 47 #include <sys/sysproto.h> 48 #include <sys/vnode.h> 49 50 #include <vm/vm.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_extern.h> 53 #include <vm/vm_map.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_param.h> 57 58 #include <machine/cpu.h> 59 #include <machine/md_var.h> 60 #include <machine/pcb.h> 61 62 #include <i386/linux/linux.h> 63 #include <i386/linux/linux_proto.h> 64 #include <compat/linux/linux_mib.h> 65 #include <compat/linux/linux_signal.h> 66 #include <compat/linux/linux_util.h> 67 68 MODULE_VERSION(linux, 1); 69 70 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 71 72 #if BYTE_ORDER == LITTLE_ENDIAN 73 #define SHELLMAGIC 0x2123 /* #! */ 74 #else 75 #define SHELLMAGIC 0x2321 76 #endif 77 78 /* 79 * Allow the sendsig functions to use the ldebug() facility 80 * even though they are not syscalls themselves. Map them 81 * to syscall 0. This is slightly less bogus than using 82 * ldebug(sigreturn). 83 */ 84 #define LINUX_SYS_linux_rt_sendsig 0 85 #define LINUX_SYS_linux_sendsig 0 86 87 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 88 #define __LINUX_NPXCW__ 0x37f 89 90 extern char linux_sigcode[]; 91 extern int linux_szsigcode; 92 93 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 94 95 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 96 97 static int linux_fixup(register_t **stack_base, 98 struct image_params *iparams); 99 static int elf_linux_fixup(register_t **stack_base, 100 struct image_params *iparams); 101 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 102 caddr_t *params); 103 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 104 static void exec_linux_setregs(struct thread *td, u_long entry, 105 u_long stack, u_long ps_strings); 106 107 /* 108 * Linux syscalls return negative errno's, we do positive and map them 109 */ 110 static int bsd_to_linux_errno[ELAST + 1] = { 111 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 112 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 113 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 114 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 115 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 116 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 117 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 118 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 119 -6, -6, -43, -42, -75, -6, -84 120 }; 121 122 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 123 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 124 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 125 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 126 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 127 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 128 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 129 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 130 0, LINUX_SIGUSR1, LINUX_SIGUSR2 131 }; 132 133 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 134 SIGHUP, SIGINT, SIGQUIT, SIGILL, 135 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 136 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 137 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 138 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 139 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 140 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 141 SIGIO, SIGURG, SIGSYS 142 }; 143 144 #define LINUX_T_UNKNOWN 255 145 static int _bsd_to_linux_trapcode[] = { 146 LINUX_T_UNKNOWN, /* 0 */ 147 6, /* 1 T_PRIVINFLT */ 148 LINUX_T_UNKNOWN, /* 2 */ 149 3, /* 3 T_BPTFLT */ 150 LINUX_T_UNKNOWN, /* 4 */ 151 LINUX_T_UNKNOWN, /* 5 */ 152 16, /* 6 T_ARITHTRAP */ 153 254, /* 7 T_ASTFLT */ 154 LINUX_T_UNKNOWN, /* 8 */ 155 13, /* 9 T_PROTFLT */ 156 1, /* 10 T_TRCTRAP */ 157 LINUX_T_UNKNOWN, /* 11 */ 158 14, /* 12 T_PAGEFLT */ 159 LINUX_T_UNKNOWN, /* 13 */ 160 17, /* 14 T_ALIGNFLT */ 161 LINUX_T_UNKNOWN, /* 15 */ 162 LINUX_T_UNKNOWN, /* 16 */ 163 LINUX_T_UNKNOWN, /* 17 */ 164 0, /* 18 T_DIVIDE */ 165 2, /* 19 T_NMI */ 166 4, /* 20 T_OFLOW */ 167 5, /* 21 T_BOUND */ 168 7, /* 22 T_DNA */ 169 8, /* 23 T_DOUBLEFLT */ 170 9, /* 24 T_FPOPFLT */ 171 10, /* 25 T_TSSFLT */ 172 11, /* 26 T_SEGNPFLT */ 173 12, /* 27 T_STKFLT */ 174 18, /* 28 T_MCHK */ 175 19, /* 29 T_XMMFLT */ 176 15 /* 30 T_RESERVED */ 177 }; 178 #define bsd_to_linux_trapcode(code) \ 179 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 180 _bsd_to_linux_trapcode[(code)]: \ 181 LINUX_T_UNKNOWN) 182 183 /* 184 * If FreeBSD & Linux have a difference of opinion about what a trap 185 * means, deal with it here. 186 * 187 * MPSAFE 188 */ 189 static int 190 translate_traps(int signal, int trap_code) 191 { 192 if (signal != SIGBUS) 193 return signal; 194 switch (trap_code) { 195 case T_PROTFLT: 196 case T_TSSFLT: 197 case T_DOUBLEFLT: 198 case T_PAGEFLT: 199 return SIGSEGV; 200 default: 201 return signal; 202 } 203 } 204 205 static int 206 linux_fixup(register_t **stack_base, struct image_params *imgp) 207 { 208 register_t *argv, *envp; 209 210 argv = *stack_base; 211 envp = *stack_base + (imgp->args->argc + 1); 212 (*stack_base)--; 213 **stack_base = (intptr_t)(void *)envp; 214 (*stack_base)--; 215 **stack_base = (intptr_t)(void *)argv; 216 (*stack_base)--; 217 **stack_base = imgp->args->argc; 218 return 0; 219 } 220 221 static int 222 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 223 { 224 Elf32_Auxargs *args; 225 register_t *pos; 226 227 KASSERT(curthread->td_proc == imgp->proc && 228 (curthread->td_proc->p_flag & P_SA) == 0, 229 ("unsafe elf_linux_fixup(), should be curproc")); 230 args = (Elf32_Auxargs *)imgp->auxargs; 231 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 232 233 if (args->trace) 234 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 235 if (args->execfd != -1) 236 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 237 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 238 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 239 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 240 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 241 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 242 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 243 AUXARGS_ENTRY(pos, AT_BASE, args->base); 244 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 245 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 246 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 247 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 248 AUXARGS_ENTRY(pos, AT_NULL, 0); 249 250 free(imgp->auxargs, M_TEMP); 251 imgp->auxargs = NULL; 252 253 (*stack_base)--; 254 **stack_base = (register_t)imgp->args->argc; 255 return 0; 256 } 257 258 extern int _ucodesel, _udatasel; 259 extern unsigned long linux_sznonrtsigcode; 260 261 static void 262 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 263 { 264 struct thread *td = curthread; 265 struct proc *p = td->td_proc; 266 struct sigacts *psp; 267 struct trapframe *regs; 268 struct l_rt_sigframe *fp, frame; 269 int sig, code; 270 int oonstack; 271 272 sig = ksi->ksi_signo; 273 code = ksi->ksi_code; 274 PROC_LOCK_ASSERT(p, MA_OWNED); 275 psp = p->p_sigacts; 276 mtx_assert(&psp->ps_mtx, MA_OWNED); 277 regs = td->td_frame; 278 oonstack = sigonstack(regs->tf_esp); 279 280 #ifdef DEBUG 281 if (ldebug(rt_sendsig)) 282 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 283 catcher, sig, (void*)mask, code); 284 #endif 285 /* 286 * Allocate space for the signal handler context. 287 */ 288 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 289 SIGISMEMBER(psp->ps_sigonstack, sig)) { 290 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 291 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 292 } else 293 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 294 mtx_unlock(&psp->ps_mtx); 295 296 /* 297 * Build the argument list for the signal handler. 298 */ 299 if (p->p_sysent->sv_sigtbl) 300 if (sig <= p->p_sysent->sv_sigsize) 301 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 302 303 bzero(&frame, sizeof(frame)); 304 305 frame.sf_handler = catcher; 306 frame.sf_sig = sig; 307 frame.sf_siginfo = &fp->sf_si; 308 frame.sf_ucontext = &fp->sf_sc; 309 310 /* Fill in POSIX parts */ 311 frame.sf_si.lsi_signo = sig; 312 frame.sf_si.lsi_code = code; 313 frame.sf_si.lsi_addr = ksi->ksi_addr; 314 315 /* 316 * Build the signal context to be used by sigreturn. 317 */ 318 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 319 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 320 321 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 322 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 323 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 324 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 325 PROC_UNLOCK(p); 326 327 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 328 329 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 330 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 331 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 332 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 333 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 334 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 335 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 336 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 337 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 338 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 339 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 340 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 341 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 342 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 343 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 344 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 345 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 346 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 347 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 348 349 #ifdef DEBUG 350 if (ldebug(rt_sendsig)) 351 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 352 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 353 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 354 #endif 355 356 if (copyout(&frame, fp, sizeof(frame)) != 0) { 357 /* 358 * Process has trashed its stack; give it an illegal 359 * instruction to halt it in its tracks. 360 */ 361 #ifdef DEBUG 362 if (ldebug(rt_sendsig)) 363 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 364 fp, oonstack); 365 #endif 366 PROC_LOCK(p); 367 sigexit(td, SIGILL); 368 } 369 370 /* 371 * Build context to run handler in. 372 */ 373 regs->tf_esp = (int)fp; 374 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 375 linux_sznonrtsigcode; 376 regs->tf_eflags &= ~(PSL_T | PSL_VM); 377 regs->tf_cs = _ucodesel; 378 regs->tf_ds = _udatasel; 379 regs->tf_es = _udatasel; 380 regs->tf_fs = _udatasel; 381 regs->tf_ss = _udatasel; 382 PROC_LOCK(p); 383 mtx_lock(&psp->ps_mtx); 384 } 385 386 387 /* 388 * Send an interrupt to process. 389 * 390 * Stack is set up to allow sigcode stored 391 * in u. to call routine, followed by kcall 392 * to sigreturn routine below. After sigreturn 393 * resets the signal mask, the stack, and the 394 * frame pointer, it returns to the user 395 * specified pc, psl. 396 */ 397 static void 398 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 399 { 400 struct thread *td = curthread; 401 struct proc *p = td->td_proc; 402 struct sigacts *psp; 403 struct trapframe *regs; 404 struct l_sigframe *fp, frame; 405 l_sigset_t lmask; 406 int sig, code; 407 int oonstack, i; 408 409 PROC_LOCK_ASSERT(p, MA_OWNED); 410 psp = p->p_sigacts; 411 sig = ksi->ksi_signo; 412 code = ksi->ksi_code; 413 mtx_assert(&psp->ps_mtx, MA_OWNED); 414 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 415 /* Signal handler installed with SA_SIGINFO. */ 416 linux_rt_sendsig(catcher, ksi, mask); 417 return; 418 } 419 regs = td->td_frame; 420 oonstack = sigonstack(regs->tf_esp); 421 422 #ifdef DEBUG 423 if (ldebug(sendsig)) 424 printf(ARGS(sendsig, "%p, %d, %p, %u"), 425 catcher, sig, (void*)mask, code); 426 #endif 427 428 /* 429 * Allocate space for the signal handler context. 430 */ 431 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 432 SIGISMEMBER(psp->ps_sigonstack, sig)) { 433 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 434 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 435 } else 436 fp = (struct l_sigframe *)regs->tf_esp - 1; 437 mtx_unlock(&psp->ps_mtx); 438 PROC_UNLOCK(p); 439 440 /* 441 * Build the argument list for the signal handler. 442 */ 443 if (p->p_sysent->sv_sigtbl) 444 if (sig <= p->p_sysent->sv_sigsize) 445 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 446 447 bzero(&frame, sizeof(frame)); 448 449 frame.sf_handler = catcher; 450 frame.sf_sig = sig; 451 452 bsd_to_linux_sigset(mask, &lmask); 453 454 /* 455 * Build the signal context to be used by sigreturn. 456 */ 457 frame.sf_sc.sc_mask = lmask.__bits[0]; 458 frame.sf_sc.sc_gs = rgs(); 459 frame.sf_sc.sc_fs = regs->tf_fs; 460 frame.sf_sc.sc_es = regs->tf_es; 461 frame.sf_sc.sc_ds = regs->tf_ds; 462 frame.sf_sc.sc_edi = regs->tf_edi; 463 frame.sf_sc.sc_esi = regs->tf_esi; 464 frame.sf_sc.sc_ebp = regs->tf_ebp; 465 frame.sf_sc.sc_ebx = regs->tf_ebx; 466 frame.sf_sc.sc_edx = regs->tf_edx; 467 frame.sf_sc.sc_ecx = regs->tf_ecx; 468 frame.sf_sc.sc_eax = regs->tf_eax; 469 frame.sf_sc.sc_eip = regs->tf_eip; 470 frame.sf_sc.sc_cs = regs->tf_cs; 471 frame.sf_sc.sc_eflags = regs->tf_eflags; 472 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 473 frame.sf_sc.sc_ss = regs->tf_ss; 474 frame.sf_sc.sc_err = regs->tf_err; 475 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 476 477 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 478 frame.sf_extramask[i] = lmask.__bits[i+1]; 479 480 if (copyout(&frame, fp, sizeof(frame)) != 0) { 481 /* 482 * Process has trashed its stack; give it an illegal 483 * instruction to halt it in its tracks. 484 */ 485 PROC_LOCK(p); 486 sigexit(td, SIGILL); 487 } 488 489 /* 490 * Build context to run handler in. 491 */ 492 regs->tf_esp = (int)fp; 493 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 494 regs->tf_eflags &= ~(PSL_T | PSL_VM); 495 regs->tf_cs = _ucodesel; 496 regs->tf_ds = _udatasel; 497 regs->tf_es = _udatasel; 498 regs->tf_fs = _udatasel; 499 regs->tf_ss = _udatasel; 500 PROC_LOCK(p); 501 mtx_lock(&psp->ps_mtx); 502 } 503 504 /* 505 * System call to cleanup state after a signal 506 * has been taken. Reset signal mask and 507 * stack state from context left by sendsig (above). 508 * Return to previous pc and psl as specified by 509 * context left by sendsig. Check carefully to 510 * make sure that the user has not modified the 511 * psl to gain improper privileges or to cause 512 * a machine fault. 513 */ 514 int 515 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 516 { 517 struct proc *p = td->td_proc; 518 struct l_sigframe frame; 519 struct trapframe *regs; 520 l_sigset_t lmask; 521 int eflags, i; 522 ksiginfo_t ksi; 523 524 regs = td->td_frame; 525 526 #ifdef DEBUG 527 if (ldebug(sigreturn)) 528 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 529 #endif 530 /* 531 * The trampoline code hands us the sigframe. 532 * It is unsafe to keep track of it ourselves, in the event that a 533 * program jumps out of a signal handler. 534 */ 535 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 536 return (EFAULT); 537 538 /* 539 * Check for security violations. 540 */ 541 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 542 eflags = frame.sf_sc.sc_eflags; 543 /* 544 * XXX do allow users to change the privileged flag PSL_RF. The 545 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 546 * sometimes set it there too. tf_eflags is kept in the signal 547 * context during signal handling and there is no other place 548 * to remember it, so the PSL_RF bit may be corrupted by the 549 * signal handler without us knowing. Corruption of the PSL_RF 550 * bit at worst causes one more or one less debugger trap, so 551 * allowing it is fairly harmless. 552 */ 553 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 554 return(EINVAL); 555 556 /* 557 * Don't allow users to load a valid privileged %cs. Let the 558 * hardware check for invalid selectors, excess privilege in 559 * other selectors, invalid %eip's and invalid %esp's. 560 */ 561 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 562 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 563 ksiginfo_init_trap(&ksi); 564 ksi.ksi_signo = SIGBUS; 565 ksi.ksi_code = BUS_OBJERR; 566 ksi.ksi_trapno = T_PROTFLT; 567 ksi.ksi_addr = (void *)regs->tf_eip; 568 trapsignal(td, &ksi); 569 return(EINVAL); 570 } 571 572 lmask.__bits[0] = frame.sf_sc.sc_mask; 573 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 574 lmask.__bits[i+1] = frame.sf_extramask[i]; 575 PROC_LOCK(p); 576 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 577 SIG_CANTMASK(td->td_sigmask); 578 signotify(td); 579 PROC_UNLOCK(p); 580 581 /* 582 * Restore signal context. 583 */ 584 /* %gs was restored by the trampoline. */ 585 regs->tf_fs = frame.sf_sc.sc_fs; 586 regs->tf_es = frame.sf_sc.sc_es; 587 regs->tf_ds = frame.sf_sc.sc_ds; 588 regs->tf_edi = frame.sf_sc.sc_edi; 589 regs->tf_esi = frame.sf_sc.sc_esi; 590 regs->tf_ebp = frame.sf_sc.sc_ebp; 591 regs->tf_ebx = frame.sf_sc.sc_ebx; 592 regs->tf_edx = frame.sf_sc.sc_edx; 593 regs->tf_ecx = frame.sf_sc.sc_ecx; 594 regs->tf_eax = frame.sf_sc.sc_eax; 595 regs->tf_eip = frame.sf_sc.sc_eip; 596 regs->tf_cs = frame.sf_sc.sc_cs; 597 regs->tf_eflags = eflags; 598 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 599 regs->tf_ss = frame.sf_sc.sc_ss; 600 601 return (EJUSTRETURN); 602 } 603 604 /* 605 * System call to cleanup state after a signal 606 * has been taken. Reset signal mask and 607 * stack state from context left by rt_sendsig (above). 608 * Return to previous pc and psl as specified by 609 * context left by sendsig. Check carefully to 610 * make sure that the user has not modified the 611 * psl to gain improper privileges or to cause 612 * a machine fault. 613 */ 614 int 615 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 616 { 617 struct proc *p = td->td_proc; 618 struct l_ucontext uc; 619 struct l_sigcontext *context; 620 l_stack_t *lss; 621 stack_t ss; 622 struct trapframe *regs; 623 int eflags; 624 ksiginfo_t ksi; 625 626 regs = td->td_frame; 627 628 #ifdef DEBUG 629 if (ldebug(rt_sigreturn)) 630 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 631 #endif 632 /* 633 * The trampoline code hands us the ucontext. 634 * It is unsafe to keep track of it ourselves, in the event that a 635 * program jumps out of a signal handler. 636 */ 637 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 638 return (EFAULT); 639 640 context = &uc.uc_mcontext; 641 642 /* 643 * Check for security violations. 644 */ 645 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 646 eflags = context->sc_eflags; 647 /* 648 * XXX do allow users to change the privileged flag PSL_RF. The 649 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 650 * sometimes set it there too. tf_eflags is kept in the signal 651 * context during signal handling and there is no other place 652 * to remember it, so the PSL_RF bit may be corrupted by the 653 * signal handler without us knowing. Corruption of the PSL_RF 654 * bit at worst causes one more or one less debugger trap, so 655 * allowing it is fairly harmless. 656 */ 657 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 658 return(EINVAL); 659 660 /* 661 * Don't allow users to load a valid privileged %cs. Let the 662 * hardware check for invalid selectors, excess privilege in 663 * other selectors, invalid %eip's and invalid %esp's. 664 */ 665 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 666 if (!CS_SECURE(context->sc_cs)) { 667 ksiginfo_init_trap(&ksi); 668 ksi.ksi_signo = SIGBUS; 669 ksi.ksi_code = BUS_OBJERR; 670 ksi.ksi_trapno = T_PROTFLT; 671 ksi.ksi_addr = (void *)regs->tf_eip; 672 trapsignal(td, &ksi); 673 return(EINVAL); 674 } 675 676 PROC_LOCK(p); 677 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 678 SIG_CANTMASK(td->td_sigmask); 679 signotify(td); 680 PROC_UNLOCK(p); 681 682 /* 683 * Restore signal context 684 */ 685 /* %gs was restored by the trampoline. */ 686 regs->tf_fs = context->sc_fs; 687 regs->tf_es = context->sc_es; 688 regs->tf_ds = context->sc_ds; 689 regs->tf_edi = context->sc_edi; 690 regs->tf_esi = context->sc_esi; 691 regs->tf_ebp = context->sc_ebp; 692 regs->tf_ebx = context->sc_ebx; 693 regs->tf_edx = context->sc_edx; 694 regs->tf_ecx = context->sc_ecx; 695 regs->tf_eax = context->sc_eax; 696 regs->tf_eip = context->sc_eip; 697 regs->tf_cs = context->sc_cs; 698 regs->tf_eflags = eflags; 699 regs->tf_esp = context->sc_esp_at_signal; 700 regs->tf_ss = context->sc_ss; 701 702 /* 703 * call sigaltstack & ignore results.. 704 */ 705 lss = &uc.uc_stack; 706 ss.ss_sp = lss->ss_sp; 707 ss.ss_size = lss->ss_size; 708 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 709 710 #ifdef DEBUG 711 if (ldebug(rt_sigreturn)) 712 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 713 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 714 #endif 715 (void)kern_sigaltstack(td, &ss, NULL); 716 717 return (EJUSTRETURN); 718 } 719 720 /* 721 * MPSAFE 722 */ 723 static void 724 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 725 { 726 args[0] = tf->tf_ebx; 727 args[1] = tf->tf_ecx; 728 args[2] = tf->tf_edx; 729 args[3] = tf->tf_esi; 730 args[4] = tf->tf_edi; 731 args[5] = tf->tf_ebp; /* Unconfirmed */ 732 *params = NULL; /* no copyin */ 733 } 734 735 /* 736 * If a linux binary is exec'ing something, try this image activator 737 * first. We override standard shell script execution in order to 738 * be able to modify the interpreter path. We only do this if a linux 739 * binary is doing the exec, so we do not create an EXEC module for it. 740 */ 741 static int exec_linux_imgact_try(struct image_params *iparams); 742 743 static int 744 exec_linux_imgact_try(struct image_params *imgp) 745 { 746 const char *head = (const char *)imgp->image_header; 747 char *rpath; 748 int error = -1, len; 749 750 /* 751 * The interpreter for shell scripts run from a linux binary needs 752 * to be located in /compat/linux if possible in order to recursively 753 * maintain linux path emulation. 754 */ 755 if (((const short *)head)[0] == SHELLMAGIC) { 756 /* 757 * Run our normal shell image activator. If it succeeds attempt 758 * to use the alternate path for the interpreter. If an alternate 759 * path is found, use our stringspace to store it. 760 */ 761 if ((error = exec_shell_imgact(imgp)) == 0) { 762 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 763 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 764 if (rpath != NULL) { 765 len = strlen(rpath) + 1; 766 767 if (len <= MAXSHELLCMDLEN) { 768 memcpy(imgp->interpreter_name, rpath, len); 769 } 770 free(rpath, M_TEMP); 771 } 772 } 773 } 774 return(error); 775 } 776 777 /* 778 * exec_setregs may initialize some registers differently than Linux 779 * does, thus potentially confusing Linux binaries. If necessary, we 780 * override the exec_setregs default(s) here. 781 */ 782 static void 783 exec_linux_setregs(struct thread *td, u_long entry, 784 u_long stack, u_long ps_strings) 785 { 786 static const u_short control = __LINUX_NPXCW__; 787 struct pcb *pcb = td->td_pcb; 788 789 exec_setregs(td, entry, stack, ps_strings); 790 791 /* Linux sets %gs to 0, we default to _udatasel */ 792 pcb->pcb_gs = 0; load_gs(0); 793 794 /* Linux sets the i387 to extended precision. */ 795 fldcw(&control); 796 } 797 798 struct sysentvec linux_sysvec = { 799 LINUX_SYS_MAXSYSCALL, 800 linux_sysent, 801 0xff, 802 LINUX_SIGTBLSZ, 803 bsd_to_linux_signal, 804 ELAST + 1, 805 bsd_to_linux_errno, 806 translate_traps, 807 linux_fixup, 808 linux_sendsig, 809 linux_sigcode, 810 &linux_szsigcode, 811 linux_prepsyscall, 812 "Linux a.out", 813 NULL, 814 exec_linux_imgact_try, 815 LINUX_MINSIGSTKSZ, 816 PAGE_SIZE, 817 VM_MIN_ADDRESS, 818 VM_MAXUSER_ADDRESS, 819 USRSTACK, 820 PS_STRINGS, 821 VM_PROT_ALL, 822 exec_copyout_strings, 823 exec_linux_setregs, 824 NULL 825 }; 826 827 struct sysentvec elf_linux_sysvec = { 828 LINUX_SYS_MAXSYSCALL, 829 linux_sysent, 830 0xff, 831 LINUX_SIGTBLSZ, 832 bsd_to_linux_signal, 833 ELAST + 1, 834 bsd_to_linux_errno, 835 translate_traps, 836 elf_linux_fixup, 837 linux_sendsig, 838 linux_sigcode, 839 &linux_szsigcode, 840 linux_prepsyscall, 841 "Linux ELF", 842 elf32_coredump, 843 exec_linux_imgact_try, 844 LINUX_MINSIGSTKSZ, 845 PAGE_SIZE, 846 VM_MIN_ADDRESS, 847 VM_MAXUSER_ADDRESS, 848 USRSTACK, 849 PS_STRINGS, 850 VM_PROT_ALL, 851 exec_copyout_strings, 852 exec_linux_setregs, 853 NULL 854 }; 855 856 static Elf32_Brandinfo linux_brand = { 857 ELFOSABI_LINUX, 858 EM_386, 859 "Linux", 860 "/compat/linux", 861 "/lib/ld-linux.so.1", 862 &elf_linux_sysvec, 863 NULL, 864 BI_CAN_EXEC_DYN, 865 }; 866 867 static Elf32_Brandinfo linux_glibc2brand = { 868 ELFOSABI_LINUX, 869 EM_386, 870 "Linux", 871 "/compat/linux", 872 "/lib/ld-linux.so.2", 873 &elf_linux_sysvec, 874 NULL, 875 BI_CAN_EXEC_DYN, 876 }; 877 878 Elf32_Brandinfo *linux_brandlist[] = { 879 &linux_brand, 880 &linux_glibc2brand, 881 NULL 882 }; 883 884 static int 885 linux_elf_modevent(module_t mod, int type, void *data) 886 { 887 Elf32_Brandinfo **brandinfo; 888 int error; 889 struct linux_ioctl_handler **lihp; 890 891 error = 0; 892 893 switch(type) { 894 case MOD_LOAD: 895 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 896 ++brandinfo) 897 if (elf32_insert_brand_entry(*brandinfo) < 0) 898 error = EINVAL; 899 if (error == 0) { 900 SET_FOREACH(lihp, linux_ioctl_handler_set) 901 linux_ioctl_register_handler(*lihp); 902 if (bootverbose) 903 printf("Linux ELF exec handler installed\n"); 904 } else 905 printf("cannot insert Linux ELF brand handler\n"); 906 break; 907 case MOD_UNLOAD: 908 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 909 ++brandinfo) 910 if (elf32_brand_inuse(*brandinfo)) 911 error = EBUSY; 912 if (error == 0) { 913 for (brandinfo = &linux_brandlist[0]; 914 *brandinfo != NULL; ++brandinfo) 915 if (elf32_remove_brand_entry(*brandinfo) < 0) 916 error = EINVAL; 917 } 918 if (error == 0) { 919 SET_FOREACH(lihp, linux_ioctl_handler_set) 920 linux_ioctl_unregister_handler(*lihp); 921 if (bootverbose) 922 printf("Linux ELF exec handler removed\n"); 923 } else 924 printf("Could not deinstall ELF interpreter entry\n"); 925 break; 926 default: 927 return EOPNOTSUPP; 928 } 929 return error; 930 } 931 932 static moduledata_t linux_elf_mod = { 933 "linuxelf", 934 linux_elf_modevent, 935 0 936 }; 937 938 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 939