1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/imgact.h> 36 #include <sys/imgact_aout.h> 37 #include <sys/imgact_elf.h> 38 #include <sys/kernel.h> 39 #include <sys/lock.h> 40 #include <sys/malloc.h> 41 #include <sys/module.h> 42 #include <sys/mutex.h> 43 #include <sys/proc.h> 44 #include <sys/signalvar.h> 45 #include <sys/syscallsubr.h> 46 #include <sys/sysent.h> 47 #include <sys/sysproto.h> 48 #include <sys/vnode.h> 49 50 #include <vm/vm.h> 51 #include <vm/pmap.h> 52 #include <vm/vm_extern.h> 53 #include <vm/vm_map.h> 54 #include <vm/vm_object.h> 55 #include <vm/vm_page.h> 56 #include <vm/vm_param.h> 57 58 #include <machine/cpu.h> 59 #include <machine/md_var.h> 60 #include <machine/pcb.h> 61 62 #include <i386/linux/linux.h> 63 #include <i386/linux/linux_proto.h> 64 #include <compat/linux/linux_mib.h> 65 #include <compat/linux/linux_signal.h> 66 #include <compat/linux/linux_util.h> 67 68 MODULE_VERSION(linux, 1); 69 70 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 71 72 #if BYTE_ORDER == LITTLE_ENDIAN 73 #define SHELLMAGIC 0x2123 /* #! */ 74 #else 75 #define SHELLMAGIC 0x2321 76 #endif 77 78 /* 79 * Allow the sendsig functions to use the ldebug() facility 80 * even though they are not syscalls themselves. Map them 81 * to syscall 0. This is slightly less bogus than using 82 * ldebug(sigreturn). 83 */ 84 #define LINUX_SYS_linux_rt_sendsig 0 85 #define LINUX_SYS_linux_sendsig 0 86 87 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 88 #define __LINUX_NPXCW__ 0x37f 89 90 extern char linux_sigcode[]; 91 extern int linux_szsigcode; 92 93 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 94 95 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 96 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 97 98 static int linux_fixup(register_t **stack_base, 99 struct image_params *iparams); 100 static int elf_linux_fixup(register_t **stack_base, 101 struct image_params *iparams); 102 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 103 caddr_t *params); 104 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 105 static void exec_linux_setregs(struct thread *td, u_long entry, 106 u_long stack, u_long ps_strings); 107 108 /* 109 * Linux syscalls return negative errno's, we do positive and map them 110 */ 111 static int bsd_to_linux_errno[ELAST + 1] = { 112 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 113 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 114 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 115 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 116 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 117 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 118 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 119 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 120 -6, -6, -43, -42, -75, -6, -84 121 }; 122 123 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 124 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 125 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 126 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 127 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 128 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 129 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 130 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 131 0, LINUX_SIGUSR1, LINUX_SIGUSR2 132 }; 133 134 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 135 SIGHUP, SIGINT, SIGQUIT, SIGILL, 136 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 137 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 138 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 139 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 140 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 141 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 142 SIGIO, SIGURG, SIGSYS 143 }; 144 145 #define LINUX_T_UNKNOWN 255 146 static int _bsd_to_linux_trapcode[] = { 147 LINUX_T_UNKNOWN, /* 0 */ 148 6, /* 1 T_PRIVINFLT */ 149 LINUX_T_UNKNOWN, /* 2 */ 150 3, /* 3 T_BPTFLT */ 151 LINUX_T_UNKNOWN, /* 4 */ 152 LINUX_T_UNKNOWN, /* 5 */ 153 16, /* 6 T_ARITHTRAP */ 154 254, /* 7 T_ASTFLT */ 155 LINUX_T_UNKNOWN, /* 8 */ 156 13, /* 9 T_PROTFLT */ 157 1, /* 10 T_TRCTRAP */ 158 LINUX_T_UNKNOWN, /* 11 */ 159 14, /* 12 T_PAGEFLT */ 160 LINUX_T_UNKNOWN, /* 13 */ 161 17, /* 14 T_ALIGNFLT */ 162 LINUX_T_UNKNOWN, /* 15 */ 163 LINUX_T_UNKNOWN, /* 16 */ 164 LINUX_T_UNKNOWN, /* 17 */ 165 0, /* 18 T_DIVIDE */ 166 2, /* 19 T_NMI */ 167 4, /* 20 T_OFLOW */ 168 5, /* 21 T_BOUND */ 169 7, /* 22 T_DNA */ 170 8, /* 23 T_DOUBLEFLT */ 171 9, /* 24 T_FPOPFLT */ 172 10, /* 25 T_TSSFLT */ 173 11, /* 26 T_SEGNPFLT */ 174 12, /* 27 T_STKFLT */ 175 18, /* 28 T_MCHK */ 176 19, /* 29 T_XMMFLT */ 177 15 /* 30 T_RESERVED */ 178 }; 179 #define bsd_to_linux_trapcode(code) \ 180 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 181 _bsd_to_linux_trapcode[(code)]: \ 182 LINUX_T_UNKNOWN) 183 184 /* 185 * If FreeBSD & Linux have a difference of opinion about what a trap 186 * means, deal with it here. 187 * 188 * MPSAFE 189 */ 190 static int 191 translate_traps(int signal, int trap_code) 192 { 193 if (signal != SIGBUS) 194 return signal; 195 switch (trap_code) { 196 case T_PROTFLT: 197 case T_TSSFLT: 198 case T_DOUBLEFLT: 199 case T_PAGEFLT: 200 return SIGSEGV; 201 default: 202 return signal; 203 } 204 } 205 206 static int 207 linux_fixup(register_t **stack_base, struct image_params *imgp) 208 { 209 register_t *argv, *envp; 210 211 argv = *stack_base; 212 envp = *stack_base + (imgp->args->argc + 1); 213 (*stack_base)--; 214 **stack_base = (intptr_t)(void *)envp; 215 (*stack_base)--; 216 **stack_base = (intptr_t)(void *)argv; 217 (*stack_base)--; 218 **stack_base = imgp->args->argc; 219 return 0; 220 } 221 222 static int 223 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 224 { 225 Elf32_Auxargs *args; 226 register_t *pos; 227 228 KASSERT(curthread->td_proc == imgp->proc && 229 (curthread->td_proc->p_flag & P_SA) == 0, 230 ("unsafe elf_linux_fixup(), should be curproc")); 231 args = (Elf32_Auxargs *)imgp->auxargs; 232 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 233 234 if (args->trace) 235 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 236 if (args->execfd != -1) 237 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 238 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 239 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 240 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 241 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 242 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 243 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 244 AUXARGS_ENTRY(pos, AT_BASE, args->base); 245 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 246 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 247 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 248 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 249 AUXARGS_ENTRY(pos, AT_NULL, 0); 250 251 free(imgp->auxargs, M_TEMP); 252 imgp->auxargs = NULL; 253 254 (*stack_base)--; 255 **stack_base = (register_t)imgp->args->argc; 256 return 0; 257 } 258 259 extern int _ucodesel, _udatasel; 260 extern unsigned long linux_sznonrtsigcode; 261 262 static void 263 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 264 { 265 struct thread *td = curthread; 266 struct proc *p = td->td_proc; 267 struct sigacts *psp; 268 struct trapframe *regs; 269 struct l_rt_sigframe *fp, frame; 270 int sig, code; 271 int oonstack; 272 273 sig = ksi->ksi_signo; 274 code = ksi->ksi_code; 275 PROC_LOCK_ASSERT(p, MA_OWNED); 276 psp = p->p_sigacts; 277 mtx_assert(&psp->ps_mtx, MA_OWNED); 278 regs = td->td_frame; 279 oonstack = sigonstack(regs->tf_esp); 280 281 #ifdef DEBUG 282 if (ldebug(rt_sendsig)) 283 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 284 catcher, sig, (void*)mask, code); 285 #endif 286 /* 287 * Allocate space for the signal handler context. 288 */ 289 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 290 SIGISMEMBER(psp->ps_sigonstack, sig)) { 291 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 292 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 293 } else 294 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 295 mtx_unlock(&psp->ps_mtx); 296 297 /* 298 * Build the argument list for the signal handler. 299 */ 300 if (p->p_sysent->sv_sigtbl) 301 if (sig <= p->p_sysent->sv_sigsize) 302 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 303 304 bzero(&frame, sizeof(frame)); 305 306 frame.sf_handler = catcher; 307 frame.sf_sig = sig; 308 frame.sf_siginfo = &fp->sf_si; 309 frame.sf_ucontext = &fp->sf_sc; 310 311 /* Fill in POSIX parts */ 312 frame.sf_si.lsi_signo = sig; 313 frame.sf_si.lsi_code = code; 314 frame.sf_si.lsi_addr = ksi->ksi_addr; 315 316 /* 317 * Build the signal context to be used by sigreturn. 318 */ 319 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 320 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 321 322 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 323 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 324 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 325 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 326 PROC_UNLOCK(p); 327 328 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 329 330 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 331 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 332 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 333 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 334 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 335 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 336 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 337 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 338 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 339 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 340 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 341 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 342 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 343 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 344 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 345 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 346 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 347 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 348 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 349 350 #ifdef DEBUG 351 if (ldebug(rt_sendsig)) 352 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 353 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 354 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 355 #endif 356 357 if (copyout(&frame, fp, sizeof(frame)) != 0) { 358 /* 359 * Process has trashed its stack; give it an illegal 360 * instruction to halt it in its tracks. 361 */ 362 #ifdef DEBUG 363 if (ldebug(rt_sendsig)) 364 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 365 fp, oonstack); 366 #endif 367 PROC_LOCK(p); 368 sigexit(td, SIGILL); 369 } 370 371 /* 372 * Build context to run handler in. 373 */ 374 regs->tf_esp = (int)fp; 375 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 376 linux_sznonrtsigcode; 377 regs->tf_eflags &= ~(PSL_T | PSL_VM); 378 regs->tf_cs = _ucodesel; 379 regs->tf_ds = _udatasel; 380 regs->tf_es = _udatasel; 381 regs->tf_fs = _udatasel; 382 regs->tf_ss = _udatasel; 383 PROC_LOCK(p); 384 mtx_lock(&psp->ps_mtx); 385 } 386 387 388 /* 389 * Send an interrupt to process. 390 * 391 * Stack is set up to allow sigcode stored 392 * in u. to call routine, followed by kcall 393 * to sigreturn routine below. After sigreturn 394 * resets the signal mask, the stack, and the 395 * frame pointer, it returns to the user 396 * specified pc, psl. 397 */ 398 static void 399 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 400 { 401 struct thread *td = curthread; 402 struct proc *p = td->td_proc; 403 struct sigacts *psp; 404 struct trapframe *regs; 405 struct l_sigframe *fp, frame; 406 l_sigset_t lmask; 407 int sig, code; 408 int oonstack, i; 409 410 PROC_LOCK_ASSERT(p, MA_OWNED); 411 psp = p->p_sigacts; 412 sig = ksi->ksi_signo; 413 code = ksi->ksi_code; 414 mtx_assert(&psp->ps_mtx, MA_OWNED); 415 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 416 /* Signal handler installed with SA_SIGINFO. */ 417 linux_rt_sendsig(catcher, ksi, mask); 418 return; 419 } 420 regs = td->td_frame; 421 oonstack = sigonstack(regs->tf_esp); 422 423 #ifdef DEBUG 424 if (ldebug(sendsig)) 425 printf(ARGS(sendsig, "%p, %d, %p, %u"), 426 catcher, sig, (void*)mask, code); 427 #endif 428 429 /* 430 * Allocate space for the signal handler context. 431 */ 432 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 433 SIGISMEMBER(psp->ps_sigonstack, sig)) { 434 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 435 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 436 } else 437 fp = (struct l_sigframe *)regs->tf_esp - 1; 438 mtx_unlock(&psp->ps_mtx); 439 PROC_UNLOCK(p); 440 441 /* 442 * Build the argument list for the signal handler. 443 */ 444 if (p->p_sysent->sv_sigtbl) 445 if (sig <= p->p_sysent->sv_sigsize) 446 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 447 448 bzero(&frame, sizeof(frame)); 449 450 frame.sf_handler = catcher; 451 frame.sf_sig = sig; 452 453 bsd_to_linux_sigset(mask, &lmask); 454 455 /* 456 * Build the signal context to be used by sigreturn. 457 */ 458 frame.sf_sc.sc_mask = lmask.__bits[0]; 459 frame.sf_sc.sc_gs = rgs(); 460 frame.sf_sc.sc_fs = regs->tf_fs; 461 frame.sf_sc.sc_es = regs->tf_es; 462 frame.sf_sc.sc_ds = regs->tf_ds; 463 frame.sf_sc.sc_edi = regs->tf_edi; 464 frame.sf_sc.sc_esi = regs->tf_esi; 465 frame.sf_sc.sc_ebp = regs->tf_ebp; 466 frame.sf_sc.sc_ebx = regs->tf_ebx; 467 frame.sf_sc.sc_edx = regs->tf_edx; 468 frame.sf_sc.sc_ecx = regs->tf_ecx; 469 frame.sf_sc.sc_eax = regs->tf_eax; 470 frame.sf_sc.sc_eip = regs->tf_eip; 471 frame.sf_sc.sc_cs = regs->tf_cs; 472 frame.sf_sc.sc_eflags = regs->tf_eflags; 473 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 474 frame.sf_sc.sc_ss = regs->tf_ss; 475 frame.sf_sc.sc_err = regs->tf_err; 476 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 477 478 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 479 frame.sf_extramask[i] = lmask.__bits[i+1]; 480 481 if (copyout(&frame, fp, sizeof(frame)) != 0) { 482 /* 483 * Process has trashed its stack; give it an illegal 484 * instruction to halt it in its tracks. 485 */ 486 PROC_LOCK(p); 487 sigexit(td, SIGILL); 488 } 489 490 /* 491 * Build context to run handler in. 492 */ 493 regs->tf_esp = (int)fp; 494 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 495 regs->tf_eflags &= ~(PSL_T | PSL_VM); 496 regs->tf_cs = _ucodesel; 497 regs->tf_ds = _udatasel; 498 regs->tf_es = _udatasel; 499 regs->tf_fs = _udatasel; 500 regs->tf_ss = _udatasel; 501 PROC_LOCK(p); 502 mtx_lock(&psp->ps_mtx); 503 } 504 505 /* 506 * System call to cleanup state after a signal 507 * has been taken. Reset signal mask and 508 * stack state from context left by sendsig (above). 509 * Return to previous pc and psl as specified by 510 * context left by sendsig. Check carefully to 511 * make sure that the user has not modified the 512 * psl to gain improper privileges or to cause 513 * a machine fault. 514 */ 515 int 516 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 517 { 518 struct proc *p = td->td_proc; 519 struct l_sigframe frame; 520 struct trapframe *regs; 521 l_sigset_t lmask; 522 int eflags, i; 523 ksiginfo_t ksi; 524 525 regs = td->td_frame; 526 527 #ifdef DEBUG 528 if (ldebug(sigreturn)) 529 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 530 #endif 531 /* 532 * The trampoline code hands us the sigframe. 533 * It is unsafe to keep track of it ourselves, in the event that a 534 * program jumps out of a signal handler. 535 */ 536 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 537 return (EFAULT); 538 539 /* 540 * Check for security violations. 541 */ 542 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 543 eflags = frame.sf_sc.sc_eflags; 544 /* 545 * XXX do allow users to change the privileged flag PSL_RF. The 546 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 547 * sometimes set it there too. tf_eflags is kept in the signal 548 * context during signal handling and there is no other place 549 * to remember it, so the PSL_RF bit may be corrupted by the 550 * signal handler without us knowing. Corruption of the PSL_RF 551 * bit at worst causes one more or one less debugger trap, so 552 * allowing it is fairly harmless. 553 */ 554 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 555 return(EINVAL); 556 557 /* 558 * Don't allow users to load a valid privileged %cs. Let the 559 * hardware check for invalid selectors, excess privilege in 560 * other selectors, invalid %eip's and invalid %esp's. 561 */ 562 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 563 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 564 ksiginfo_init_trap(&ksi); 565 ksi.ksi_signo = SIGBUS; 566 ksi.ksi_code = BUS_OBJERR; 567 ksi.ksi_trapno = T_PROTFLT; 568 ksi.ksi_addr = (void *)regs->tf_eip; 569 trapsignal(td, &ksi); 570 return(EINVAL); 571 } 572 573 lmask.__bits[0] = frame.sf_sc.sc_mask; 574 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 575 lmask.__bits[i+1] = frame.sf_extramask[i]; 576 PROC_LOCK(p); 577 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 578 SIG_CANTMASK(td->td_sigmask); 579 signotify(td); 580 PROC_UNLOCK(p); 581 582 /* 583 * Restore signal context. 584 */ 585 /* %gs was restored by the trampoline. */ 586 regs->tf_fs = frame.sf_sc.sc_fs; 587 regs->tf_es = frame.sf_sc.sc_es; 588 regs->tf_ds = frame.sf_sc.sc_ds; 589 regs->tf_edi = frame.sf_sc.sc_edi; 590 regs->tf_esi = frame.sf_sc.sc_esi; 591 regs->tf_ebp = frame.sf_sc.sc_ebp; 592 regs->tf_ebx = frame.sf_sc.sc_ebx; 593 regs->tf_edx = frame.sf_sc.sc_edx; 594 regs->tf_ecx = frame.sf_sc.sc_ecx; 595 regs->tf_eax = frame.sf_sc.sc_eax; 596 regs->tf_eip = frame.sf_sc.sc_eip; 597 regs->tf_cs = frame.sf_sc.sc_cs; 598 regs->tf_eflags = eflags; 599 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 600 regs->tf_ss = frame.sf_sc.sc_ss; 601 602 return (EJUSTRETURN); 603 } 604 605 /* 606 * System call to cleanup state after a signal 607 * has been taken. Reset signal mask and 608 * stack state from context left by rt_sendsig (above). 609 * Return to previous pc and psl as specified by 610 * context left by sendsig. Check carefully to 611 * make sure that the user has not modified the 612 * psl to gain improper privileges or to cause 613 * a machine fault. 614 */ 615 int 616 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 617 { 618 struct proc *p = td->td_proc; 619 struct l_ucontext uc; 620 struct l_sigcontext *context; 621 l_stack_t *lss; 622 stack_t ss; 623 struct trapframe *regs; 624 int eflags; 625 ksiginfo_t ksi; 626 627 regs = td->td_frame; 628 629 #ifdef DEBUG 630 if (ldebug(rt_sigreturn)) 631 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 632 #endif 633 /* 634 * The trampoline code hands us the ucontext. 635 * It is unsafe to keep track of it ourselves, in the event that a 636 * program jumps out of a signal handler. 637 */ 638 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 639 return (EFAULT); 640 641 context = &uc.uc_mcontext; 642 643 /* 644 * Check for security violations. 645 */ 646 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 647 eflags = context->sc_eflags; 648 /* 649 * XXX do allow users to change the privileged flag PSL_RF. The 650 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 651 * sometimes set it there too. tf_eflags is kept in the signal 652 * context during signal handling and there is no other place 653 * to remember it, so the PSL_RF bit may be corrupted by the 654 * signal handler without us knowing. Corruption of the PSL_RF 655 * bit at worst causes one more or one less debugger trap, so 656 * allowing it is fairly harmless. 657 */ 658 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 659 return(EINVAL); 660 661 /* 662 * Don't allow users to load a valid privileged %cs. Let the 663 * hardware check for invalid selectors, excess privilege in 664 * other selectors, invalid %eip's and invalid %esp's. 665 */ 666 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 667 if (!CS_SECURE(context->sc_cs)) { 668 ksiginfo_init_trap(&ksi); 669 ksi.ksi_signo = SIGBUS; 670 ksi.ksi_code = BUS_OBJERR; 671 ksi.ksi_trapno = T_PROTFLT; 672 ksi.ksi_addr = (void *)regs->tf_eip; 673 trapsignal(td, &ksi); 674 return(EINVAL); 675 } 676 677 PROC_LOCK(p); 678 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 679 SIG_CANTMASK(td->td_sigmask); 680 signotify(td); 681 PROC_UNLOCK(p); 682 683 /* 684 * Restore signal context 685 */ 686 /* %gs was restored by the trampoline. */ 687 regs->tf_fs = context->sc_fs; 688 regs->tf_es = context->sc_es; 689 regs->tf_ds = context->sc_ds; 690 regs->tf_edi = context->sc_edi; 691 regs->tf_esi = context->sc_esi; 692 regs->tf_ebp = context->sc_ebp; 693 regs->tf_ebx = context->sc_ebx; 694 regs->tf_edx = context->sc_edx; 695 regs->tf_ecx = context->sc_ecx; 696 regs->tf_eax = context->sc_eax; 697 regs->tf_eip = context->sc_eip; 698 regs->tf_cs = context->sc_cs; 699 regs->tf_eflags = eflags; 700 regs->tf_esp = context->sc_esp_at_signal; 701 regs->tf_ss = context->sc_ss; 702 703 /* 704 * call sigaltstack & ignore results.. 705 */ 706 lss = &uc.uc_stack; 707 ss.ss_sp = lss->ss_sp; 708 ss.ss_size = lss->ss_size; 709 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 710 711 #ifdef DEBUG 712 if (ldebug(rt_sigreturn)) 713 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 714 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 715 #endif 716 (void)kern_sigaltstack(td, &ss, NULL); 717 718 return (EJUSTRETURN); 719 } 720 721 /* 722 * MPSAFE 723 */ 724 static void 725 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 726 { 727 args[0] = tf->tf_ebx; 728 args[1] = tf->tf_ecx; 729 args[2] = tf->tf_edx; 730 args[3] = tf->tf_esi; 731 args[4] = tf->tf_edi; 732 args[5] = tf->tf_ebp; /* Unconfirmed */ 733 *params = NULL; /* no copyin */ 734 } 735 736 /* 737 * If a linux binary is exec'ing something, try this image activator 738 * first. We override standard shell script execution in order to 739 * be able to modify the interpreter path. We only do this if a linux 740 * binary is doing the exec, so we do not create an EXEC module for it. 741 */ 742 static int exec_linux_imgact_try(struct image_params *iparams); 743 744 static int 745 exec_linux_imgact_try(struct image_params *imgp) 746 { 747 const char *head = (const char *)imgp->image_header; 748 char *rpath; 749 int error = -1, len; 750 751 /* 752 * The interpreter for shell scripts run from a linux binary needs 753 * to be located in /compat/linux if possible in order to recursively 754 * maintain linux path emulation. 755 */ 756 if (((const short *)head)[0] == SHELLMAGIC) { 757 /* 758 * Run our normal shell image activator. If it succeeds attempt 759 * to use the alternate path for the interpreter. If an alternate 760 * path is found, use our stringspace to store it. 761 */ 762 if ((error = exec_shell_imgact(imgp)) == 0) { 763 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 764 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 765 if (rpath != NULL) { 766 len = strlen(rpath) + 1; 767 768 if (len <= MAXSHELLCMDLEN) { 769 memcpy(imgp->interpreter_name, rpath, len); 770 } 771 free(rpath, M_TEMP); 772 } 773 } 774 } 775 return(error); 776 } 777 778 /* 779 * exec_setregs may initialize some registers differently than Linux 780 * does, thus potentially confusing Linux binaries. If necessary, we 781 * override the exec_setregs default(s) here. 782 */ 783 static void 784 exec_linux_setregs(struct thread *td, u_long entry, 785 u_long stack, u_long ps_strings) 786 { 787 static const u_short control = __LINUX_NPXCW__; 788 struct pcb *pcb = td->td_pcb; 789 790 exec_setregs(td, entry, stack, ps_strings); 791 792 /* Linux sets %gs to 0, we default to _udatasel */ 793 pcb->pcb_gs = 0; load_gs(0); 794 795 /* Linux sets the i387 to extended precision. */ 796 fldcw(&control); 797 } 798 799 struct sysentvec linux_sysvec = { 800 LINUX_SYS_MAXSYSCALL, 801 linux_sysent, 802 0xff, 803 LINUX_SIGTBLSZ, 804 bsd_to_linux_signal, 805 ELAST + 1, 806 bsd_to_linux_errno, 807 translate_traps, 808 linux_fixup, 809 linux_sendsig, 810 linux_sigcode, 811 &linux_szsigcode, 812 linux_prepsyscall, 813 "Linux a.out", 814 NULL, 815 exec_linux_imgact_try, 816 LINUX_MINSIGSTKSZ, 817 PAGE_SIZE, 818 VM_MIN_ADDRESS, 819 VM_MAXUSER_ADDRESS, 820 USRSTACK, 821 PS_STRINGS, 822 VM_PROT_ALL, 823 exec_copyout_strings, 824 exec_linux_setregs, 825 NULL 826 }; 827 828 struct sysentvec elf_linux_sysvec = { 829 LINUX_SYS_MAXSYSCALL, 830 linux_sysent, 831 0xff, 832 LINUX_SIGTBLSZ, 833 bsd_to_linux_signal, 834 ELAST + 1, 835 bsd_to_linux_errno, 836 translate_traps, 837 elf_linux_fixup, 838 linux_sendsig, 839 linux_sigcode, 840 &linux_szsigcode, 841 linux_prepsyscall, 842 "Linux ELF", 843 elf32_coredump, 844 exec_linux_imgact_try, 845 LINUX_MINSIGSTKSZ, 846 PAGE_SIZE, 847 VM_MIN_ADDRESS, 848 VM_MAXUSER_ADDRESS, 849 USRSTACK, 850 PS_STRINGS, 851 VM_PROT_ALL, 852 exec_copyout_strings, 853 exec_linux_setregs, 854 NULL 855 }; 856 857 static Elf32_Brandinfo linux_brand = { 858 ELFOSABI_LINUX, 859 EM_386, 860 "Linux", 861 "/compat/linux", 862 "/lib/ld-linux.so.1", 863 &elf_linux_sysvec, 864 NULL, 865 BI_CAN_EXEC_DYN, 866 }; 867 868 static Elf32_Brandinfo linux_glibc2brand = { 869 ELFOSABI_LINUX, 870 EM_386, 871 "Linux", 872 "/compat/linux", 873 "/lib/ld-linux.so.2", 874 &elf_linux_sysvec, 875 NULL, 876 BI_CAN_EXEC_DYN, 877 }; 878 879 Elf32_Brandinfo *linux_brandlist[] = { 880 &linux_brand, 881 &linux_glibc2brand, 882 NULL 883 }; 884 885 static int 886 linux_elf_modevent(module_t mod, int type, void *data) 887 { 888 Elf32_Brandinfo **brandinfo; 889 int error; 890 struct linux_ioctl_handler **lihp; 891 struct linux_device_handler **ldhp; 892 893 error = 0; 894 895 switch(type) { 896 case MOD_LOAD: 897 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 898 ++brandinfo) 899 if (elf32_insert_brand_entry(*brandinfo) < 0) 900 error = EINVAL; 901 if (error == 0) { 902 SET_FOREACH(lihp, linux_ioctl_handler_set) 903 linux_ioctl_register_handler(*lihp); 904 SET_FOREACH(ldhp, linux_device_handler_set) 905 linux_device_register_handler(*ldhp); 906 if (bootverbose) 907 printf("Linux ELF exec handler installed\n"); 908 } else 909 printf("cannot insert Linux ELF brand handler\n"); 910 break; 911 case MOD_UNLOAD: 912 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 913 ++brandinfo) 914 if (elf32_brand_inuse(*brandinfo)) 915 error = EBUSY; 916 if (error == 0) { 917 for (brandinfo = &linux_brandlist[0]; 918 *brandinfo != NULL; ++brandinfo) 919 if (elf32_remove_brand_entry(*brandinfo) < 0) 920 error = EINVAL; 921 } 922 if (error == 0) { 923 SET_FOREACH(lihp, linux_ioctl_handler_set) 924 linux_ioctl_unregister_handler(*lihp); 925 SET_FOREACH(ldhp, linux_device_handler_set) 926 linux_device_unregister_handler(*ldhp); 927 if (bootverbose) 928 printf("Linux ELF exec handler removed\n"); 929 } else 930 printf("Could not deinstall ELF interpreter entry\n"); 931 break; 932 default: 933 return EOPNOTSUPP; 934 } 935 return error; 936 } 937 938 static moduledata_t linux_elf_mod = { 939 "linuxelf", 940 linux_elf_modevent, 941 0 942 }; 943 944 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 945