1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* XXX we use functions that might not exist. */ 32 #include "opt_compat.h" 33 34 #ifndef COMPAT_43 35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 36 #endif 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/imgact.h> 41 #include <sys/imgact_aout.h> 42 #include <sys/imgact_elf.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mutex.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysent.h> 50 #include <sys/sysproto.h> 51 #include <sys/user.h> 52 #include <sys/vnode.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_param.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_extern.h> 58 #include <sys/exec.h> 59 #include <sys/kernel.h> 60 #include <sys/module.h> 61 #include <machine/cpu.h> 62 #include <machine/md_var.h> 63 #include <sys/mutex.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 71 #include <i386/linux/linux.h> 72 #include <i386/linux/linux_proto.h> 73 #include <compat/linux/linux_mib.h> 74 #include <compat/linux/linux_signal.h> 75 #include <compat/linux/linux_util.h> 76 77 MODULE_VERSION(linux, 1); 78 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 79 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 80 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 81 82 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 83 84 #if BYTE_ORDER == LITTLE_ENDIAN 85 #define SHELLMAGIC 0x2123 /* #! */ 86 #else 87 #define SHELLMAGIC 0x2321 88 #endif 89 90 /* 91 * Allow the sendsig functions to use the ldebug() facility 92 * even though they are not syscalls themselves. Map them 93 * to syscall 0. This is slightly less bogus than using 94 * ldebug(sigreturn). 95 */ 96 #define LINUX_SYS_linux_rt_sendsig 0 97 #define LINUX_SYS_linux_sendsig 0 98 99 extern char linux_sigcode[]; 100 extern int linux_szsigcode; 101 102 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 103 104 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 105 106 static int linux_fixup(register_t **stack_base, 107 struct image_params *iparams); 108 static int elf_linux_fixup(register_t **stack_base, 109 struct image_params *iparams); 110 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 111 caddr_t *params); 112 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 113 u_long code); 114 115 /* 116 * Linux syscalls return negative errno's, we do positive and map them 117 */ 118 static int bsd_to_linux_errno[ELAST + 1] = { 119 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 120 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 121 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 122 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 123 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 124 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 125 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 126 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 127 -6, -6, -43, -42, -75, -6, -84 128 }; 129 130 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 131 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 132 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 133 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 134 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 135 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 136 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 137 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 138 0, LINUX_SIGUSR1, LINUX_SIGUSR2 139 }; 140 141 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 142 SIGHUP, SIGINT, SIGQUIT, SIGILL, 143 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 144 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 145 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 146 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 147 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 148 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 149 SIGIO, SIGURG, SIGSYS 150 }; 151 152 #define LINUX_T_UNKNOWN 255 153 static int _bsd_to_linux_trapcode[] = { 154 LINUX_T_UNKNOWN, /* 0 */ 155 6, /* 1 T_PRIVINFLT */ 156 LINUX_T_UNKNOWN, /* 2 */ 157 3, /* 3 T_BPTFLT */ 158 LINUX_T_UNKNOWN, /* 4 */ 159 LINUX_T_UNKNOWN, /* 5 */ 160 16, /* 6 T_ARITHTRAP */ 161 254, /* 7 T_ASTFLT */ 162 LINUX_T_UNKNOWN, /* 8 */ 163 13, /* 9 T_PROTFLT */ 164 1, /* 10 T_TRCTRAP */ 165 LINUX_T_UNKNOWN, /* 11 */ 166 14, /* 12 T_PAGEFLT */ 167 LINUX_T_UNKNOWN, /* 13 */ 168 17, /* 14 T_ALIGNFLT */ 169 LINUX_T_UNKNOWN, /* 15 */ 170 LINUX_T_UNKNOWN, /* 16 */ 171 LINUX_T_UNKNOWN, /* 17 */ 172 0, /* 18 T_DIVIDE */ 173 2, /* 19 T_NMI */ 174 4, /* 20 T_OFLOW */ 175 5, /* 21 T_BOUND */ 176 7, /* 22 T_DNA */ 177 8, /* 23 T_DOUBLEFLT */ 178 9, /* 24 T_FPOPFLT */ 179 10, /* 25 T_TSSFLT */ 180 11, /* 26 T_SEGNPFLT */ 181 12, /* 27 T_STKFLT */ 182 18, /* 28 T_MCHK */ 183 19, /* 29 T_XMMFLT */ 184 15 /* 30 T_RESERVED */ 185 }; 186 #define bsd_to_linux_trapcode(code) \ 187 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 188 _bsd_to_linux_trapcode[(code)]: \ 189 LINUX_T_UNKNOWN) 190 191 /* 192 * If FreeBSD & Linux have a difference of opinion about what a trap 193 * means, deal with it here. 194 * 195 * MPSAFE 196 */ 197 static int 198 translate_traps(int signal, int trap_code) 199 { 200 if (signal != SIGBUS) 201 return signal; 202 switch (trap_code) { 203 case T_PROTFLT: 204 case T_TSSFLT: 205 case T_DOUBLEFLT: 206 case T_PAGEFLT: 207 return SIGSEGV; 208 default: 209 return signal; 210 } 211 } 212 213 static int 214 linux_fixup(register_t **stack_base, struct image_params *imgp) 215 { 216 register_t *argv, *envp; 217 218 argv = *stack_base; 219 envp = *stack_base + (imgp->argc + 1); 220 (*stack_base)--; 221 **stack_base = (intptr_t)(void *)envp; 222 (*stack_base)--; 223 **stack_base = (intptr_t)(void *)argv; 224 (*stack_base)--; 225 **stack_base = imgp->argc; 226 return 0; 227 } 228 229 static int 230 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 231 { 232 Elf32_Auxargs *args; 233 register_t *pos; 234 235 KASSERT(curthread->td_proc == imgp->proc && 236 (curthread->td_proc->p_flag & P_THREADED) == 0, 237 ("unsafe elf_linux_fixup(), should be curproc")); 238 args = (Elf32_Auxargs *)imgp->auxargs; 239 pos = *stack_base + (imgp->argc + imgp->envc + 2); 240 241 if (args->trace) 242 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 243 if (args->execfd != -1) 244 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 245 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 246 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 247 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 248 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 249 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 250 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 251 AUXARGS_ENTRY(pos, AT_BASE, args->base); 252 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 253 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 254 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 255 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 256 AUXARGS_ENTRY(pos, AT_NULL, 0); 257 258 free(imgp->auxargs, M_TEMP); 259 imgp->auxargs = NULL; 260 261 (*stack_base)--; 262 **stack_base = (register_t)imgp->argc; 263 return 0; 264 } 265 266 extern int _ucodesel, _udatasel; 267 extern unsigned long linux_sznonrtsigcode; 268 269 static void 270 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 271 { 272 struct thread *td = curthread; 273 struct proc *p = td->td_proc; 274 struct trapframe *regs; 275 struct l_rt_sigframe *fp, frame; 276 int oonstack; 277 278 PROC_LOCK_ASSERT(p, MA_OWNED); 279 regs = td->td_frame; 280 oonstack = sigonstack(regs->tf_esp); 281 282 #ifdef DEBUG 283 if (ldebug(rt_sendsig)) 284 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 285 catcher, sig, (void*)mask, code); 286 #endif 287 /* 288 * Allocate space for the signal handler context. 289 */ 290 if ((p->p_flag & P_ALTSTACK) && !oonstack && 291 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 292 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp + 293 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 294 } else 295 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 296 297 /* 298 * Build the argument list for the signal handler. 299 */ 300 if (p->p_sysent->sv_sigtbl) 301 if (sig <= p->p_sysent->sv_sigsize) 302 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 303 304 bzero(&frame, sizeof(frame)); 305 306 frame.sf_handler = catcher; 307 frame.sf_sig = sig; 308 frame.sf_siginfo = &fp->sf_si; 309 frame.sf_ucontext = &fp->sf_sc; 310 311 /* Fill in POSIX parts */ 312 frame.sf_si.lsi_signo = sig; 313 frame.sf_si.lsi_code = code; 314 frame.sf_si.lsi_addr = (void *)regs->tf_err; 315 316 /* 317 * Build the signal context to be used by sigreturn. 318 */ 319 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 320 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 321 322 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp; 323 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size; 324 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) 325 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 326 PROC_UNLOCK(p); 327 328 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 329 330 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 331 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 332 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 333 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 334 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 335 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 336 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 337 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 338 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 339 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 340 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 341 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 342 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 343 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 344 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 345 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 346 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 347 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 348 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 349 350 #ifdef DEBUG 351 if (ldebug(rt_sendsig)) 352 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 353 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp, 354 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 355 #endif 356 357 if (copyout(&frame, fp, sizeof(frame)) != 0) { 358 /* 359 * Process has trashed its stack; give it an illegal 360 * instruction to halt it in its tracks. 361 */ 362 #ifdef DEBUG 363 if (ldebug(rt_sendsig)) 364 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 365 fp, oonstack); 366 #endif 367 PROC_LOCK(p); 368 sigexit(td, SIGILL); 369 } 370 371 /* 372 * Build context to run handler in. 373 */ 374 regs->tf_esp = (int)fp; 375 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 376 linux_sznonrtsigcode; 377 regs->tf_eflags &= ~(PSL_T | PSL_VM); 378 regs->tf_cs = _ucodesel; 379 regs->tf_ds = _udatasel; 380 regs->tf_es = _udatasel; 381 regs->tf_fs = _udatasel; 382 regs->tf_ss = _udatasel; 383 PROC_LOCK(p); 384 } 385 386 387 /* 388 * Send an interrupt to process. 389 * 390 * Stack is set up to allow sigcode stored 391 * in u. to call routine, followed by kcall 392 * to sigreturn routine below. After sigreturn 393 * resets the signal mask, the stack, and the 394 * frame pointer, it returns to the user 395 * specified pc, psl. 396 */ 397 static void 398 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 399 { 400 struct thread *td = curthread; 401 struct proc *p = td->td_proc; 402 struct trapframe *regs; 403 struct l_sigframe *fp, frame; 404 l_sigset_t lmask; 405 int oonstack, i; 406 407 PROC_LOCK_ASSERT(p, MA_OWNED); 408 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { 409 /* Signal handler installed with SA_SIGINFO. */ 410 linux_rt_sendsig(catcher, sig, mask, code); 411 return; 412 } 413 414 regs = td->td_frame; 415 oonstack = sigonstack(regs->tf_esp); 416 417 #ifdef DEBUG 418 if (ldebug(sendsig)) 419 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 420 catcher, sig, (void*)mask, code); 421 #endif 422 423 /* 424 * Allocate space for the signal handler context. 425 */ 426 if ((p->p_flag & P_ALTSTACK) && !oonstack && 427 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 428 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp + 429 p->p_sigstk.ss_size - sizeof(struct l_sigframe)); 430 } else 431 fp = (struct l_sigframe *)regs->tf_esp - 1; 432 PROC_UNLOCK(p); 433 434 /* 435 * Build the argument list for the signal handler. 436 */ 437 if (p->p_sysent->sv_sigtbl) 438 if (sig <= p->p_sysent->sv_sigsize) 439 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 440 441 bzero(&frame, sizeof(frame)); 442 443 frame.sf_handler = catcher; 444 frame.sf_sig = sig; 445 446 bsd_to_linux_sigset(mask, &lmask); 447 448 /* 449 * Build the signal context to be used by sigreturn. 450 */ 451 frame.sf_sc.sc_mask = lmask.__bits[0]; 452 frame.sf_sc.sc_gs = rgs(); 453 frame.sf_sc.sc_fs = regs->tf_fs; 454 frame.sf_sc.sc_es = regs->tf_es; 455 frame.sf_sc.sc_ds = regs->tf_ds; 456 frame.sf_sc.sc_edi = regs->tf_edi; 457 frame.sf_sc.sc_esi = regs->tf_esi; 458 frame.sf_sc.sc_ebp = regs->tf_ebp; 459 frame.sf_sc.sc_ebx = regs->tf_ebx; 460 frame.sf_sc.sc_edx = regs->tf_edx; 461 frame.sf_sc.sc_ecx = regs->tf_ecx; 462 frame.sf_sc.sc_eax = regs->tf_eax; 463 frame.sf_sc.sc_eip = regs->tf_eip; 464 frame.sf_sc.sc_cs = regs->tf_cs; 465 frame.sf_sc.sc_eflags = regs->tf_eflags; 466 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 467 frame.sf_sc.sc_ss = regs->tf_ss; 468 frame.sf_sc.sc_err = regs->tf_err; 469 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 470 471 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 472 frame.sf_extramask[i] = lmask.__bits[i+1]; 473 474 if (copyout(&frame, fp, sizeof(frame)) != 0) { 475 /* 476 * Process has trashed its stack; give it an illegal 477 * instruction to halt it in its tracks. 478 */ 479 PROC_LOCK(p); 480 sigexit(td, SIGILL); 481 } 482 483 /* 484 * Build context to run handler in. 485 */ 486 regs->tf_esp = (int)fp; 487 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 488 regs->tf_eflags &= ~(PSL_T | PSL_VM); 489 regs->tf_cs = _ucodesel; 490 regs->tf_ds = _udatasel; 491 regs->tf_es = _udatasel; 492 regs->tf_fs = _udatasel; 493 regs->tf_ss = _udatasel; 494 PROC_LOCK(p); 495 } 496 497 /* 498 * System call to cleanup state after a signal 499 * has been taken. Reset signal mask and 500 * stack state from context left by sendsig (above). 501 * Return to previous pc and psl as specified by 502 * context left by sendsig. Check carefully to 503 * make sure that the user has not modified the 504 * psl to gain improper privileges or to cause 505 * a machine fault. 506 */ 507 int 508 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 509 { 510 struct proc *p = td->td_proc; 511 struct l_sigframe frame; 512 struct trapframe *regs; 513 l_sigset_t lmask; 514 int eflags, i; 515 516 regs = td->td_frame; 517 518 #ifdef DEBUG 519 if (ldebug(sigreturn)) 520 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 521 #endif 522 /* 523 * The trampoline code hands us the sigframe. 524 * It is unsafe to keep track of it ourselves, in the event that a 525 * program jumps out of a signal handler. 526 */ 527 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 528 return (EFAULT); 529 530 /* 531 * Check for security violations. 532 */ 533 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 534 eflags = frame.sf_sc.sc_eflags; 535 /* 536 * XXX do allow users to change the privileged flag PSL_RF. The 537 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 538 * sometimes set it there too. tf_eflags is kept in the signal 539 * context during signal handling and there is no other place 540 * to remember it, so the PSL_RF bit may be corrupted by the 541 * signal handler without us knowing. Corruption of the PSL_RF 542 * bit at worst causes one more or one less debugger trap, so 543 * allowing it is fairly harmless. 544 */ 545 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 546 return(EINVAL); 547 548 /* 549 * Don't allow users to load a valid privileged %cs. Let the 550 * hardware check for invalid selectors, excess privilege in 551 * other selectors, invalid %eip's and invalid %esp's. 552 */ 553 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 554 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 555 trapsignal(td, SIGBUS, T_PROTFLT); 556 return(EINVAL); 557 } 558 559 lmask.__bits[0] = frame.sf_sc.sc_mask; 560 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 561 lmask.__bits[i+1] = frame.sf_extramask[i]; 562 PROC_LOCK(p); 563 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 564 SIG_CANTMASK(td->td_sigmask); 565 signotify(td); 566 PROC_UNLOCK(p); 567 568 /* 569 * Restore signal context. 570 */ 571 /* %gs was restored by the trampoline. */ 572 regs->tf_fs = frame.sf_sc.sc_fs; 573 regs->tf_es = frame.sf_sc.sc_es; 574 regs->tf_ds = frame.sf_sc.sc_ds; 575 regs->tf_edi = frame.sf_sc.sc_edi; 576 regs->tf_esi = frame.sf_sc.sc_esi; 577 regs->tf_ebp = frame.sf_sc.sc_ebp; 578 regs->tf_ebx = frame.sf_sc.sc_ebx; 579 regs->tf_edx = frame.sf_sc.sc_edx; 580 regs->tf_ecx = frame.sf_sc.sc_ecx; 581 regs->tf_eax = frame.sf_sc.sc_eax; 582 regs->tf_eip = frame.sf_sc.sc_eip; 583 regs->tf_cs = frame.sf_sc.sc_cs; 584 regs->tf_eflags = eflags; 585 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 586 regs->tf_ss = frame.sf_sc.sc_ss; 587 588 return (EJUSTRETURN); 589 } 590 591 /* 592 * System call to cleanup state after a signal 593 * has been taken. Reset signal mask and 594 * stack state from context left by rt_sendsig (above). 595 * Return to previous pc and psl as specified by 596 * context left by sendsig. Check carefully to 597 * make sure that the user has not modified the 598 * psl to gain improper privileges or to cause 599 * a machine fault. 600 */ 601 int 602 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 603 { 604 struct proc *p = td->td_proc; 605 struct l_ucontext uc; 606 struct l_sigcontext *context; 607 l_stack_t *lss; 608 stack_t ss; 609 struct trapframe *regs; 610 int eflags; 611 612 regs = td->td_frame; 613 614 #ifdef DEBUG 615 if (ldebug(rt_sigreturn)) 616 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 617 #endif 618 /* 619 * The trampoline code hands us the ucontext. 620 * It is unsafe to keep track of it ourselves, in the event that a 621 * program jumps out of a signal handler. 622 */ 623 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 624 return (EFAULT); 625 626 context = &uc.uc_mcontext; 627 628 /* 629 * Check for security violations. 630 */ 631 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 632 eflags = context->sc_eflags; 633 /* 634 * XXX do allow users to change the privileged flag PSL_RF. The 635 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 636 * sometimes set it there too. tf_eflags is kept in the signal 637 * context during signal handling and there is no other place 638 * to remember it, so the PSL_RF bit may be corrupted by the 639 * signal handler without us knowing. Corruption of the PSL_RF 640 * bit at worst causes one more or one less debugger trap, so 641 * allowing it is fairly harmless. 642 */ 643 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 644 return(EINVAL); 645 646 /* 647 * Don't allow users to load a valid privileged %cs. Let the 648 * hardware check for invalid selectors, excess privilege in 649 * other selectors, invalid %eip's and invalid %esp's. 650 */ 651 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 652 if (!CS_SECURE(context->sc_cs)) { 653 trapsignal(td, SIGBUS, T_PROTFLT); 654 return(EINVAL); 655 } 656 657 PROC_LOCK(p); 658 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 659 SIG_CANTMASK(td->td_sigmask); 660 signotify(td); 661 PROC_UNLOCK(p); 662 663 /* 664 * Restore signal context 665 */ 666 /* %gs was restored by the trampoline. */ 667 regs->tf_fs = context->sc_fs; 668 regs->tf_es = context->sc_es; 669 regs->tf_ds = context->sc_ds; 670 regs->tf_edi = context->sc_edi; 671 regs->tf_esi = context->sc_esi; 672 regs->tf_ebp = context->sc_ebp; 673 regs->tf_ebx = context->sc_ebx; 674 regs->tf_edx = context->sc_edx; 675 regs->tf_ecx = context->sc_ecx; 676 regs->tf_eax = context->sc_eax; 677 regs->tf_eip = context->sc_eip; 678 regs->tf_cs = context->sc_cs; 679 regs->tf_eflags = eflags; 680 regs->tf_esp = context->sc_esp_at_signal; 681 regs->tf_ss = context->sc_ss; 682 683 /* 684 * call sigaltstack & ignore results.. 685 */ 686 lss = &uc.uc_stack; 687 ss.ss_sp = lss->ss_sp; 688 ss.ss_size = lss->ss_size; 689 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 690 691 #ifdef DEBUG 692 if (ldebug(rt_sigreturn)) 693 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 694 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 695 #endif 696 (void)kern_sigaltstack(td, &ss, NULL); 697 698 return (EJUSTRETURN); 699 } 700 701 /* 702 * MPSAFE 703 */ 704 static void 705 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 706 { 707 args[0] = tf->tf_ebx; 708 args[1] = tf->tf_ecx; 709 args[2] = tf->tf_edx; 710 args[3] = tf->tf_esi; 711 args[4] = tf->tf_edi; 712 args[5] = tf->tf_ebp; /* Unconfirmed */ 713 *params = NULL; /* no copyin */ 714 } 715 716 717 718 /* 719 * Dump core, into a file named as described in the comments for 720 * expand_name(), unless the process was setuid/setgid. 721 */ 722 static int 723 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 724 { 725 struct proc *p = td->td_proc; 726 struct ucred *cred = td->td_ucred; 727 struct vmspace *vm = p->p_vmspace; 728 char *tempuser; 729 int error; 730 731 if (ctob((uarea_pages + kstack_pages) + 732 vm->vm_dsize + vm->vm_ssize) >= limit) 733 return (EFAULT); 734 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 735 M_WAITOK | M_ZERO); 736 if (tempuser == NULL) 737 return (ENOMEM); 738 PROC_LOCK(p); 739 fill_kinfo_proc(p, &p->p_uarea->u_kproc); 740 PROC_UNLOCK(p); 741 bcopy(p->p_uarea, tempuser, sizeof(struct user)); 742 bcopy(td->td_frame, 743 tempuser + ctob(uarea_pages) + 744 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 745 sizeof(struct trapframe)); 746 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 747 ctob(uarea_pages + kstack_pages), 748 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 749 (int *)NULL, td); 750 free(tempuser, M_TEMP); 751 if (error == 0) 752 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 753 (int)ctob(vm->vm_dsize), 754 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 755 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 756 if (error == 0) 757 error = vn_rdwr_inchunks(UIO_WRITE, vp, 758 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 759 round_page(ctob(vm->vm_ssize)), 760 (off_t)ctob(uarea_pages + kstack_pages) + 761 ctob(vm->vm_dsize), UIO_USERSPACE, 762 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 763 return (error); 764 } 765 /* 766 * If a linux binary is exec'ing something, try this image activator 767 * first. We override standard shell script execution in order to 768 * be able to modify the interpreter path. We only do this if a linux 769 * binary is doing the exec, so we do not create an EXEC module for it. 770 */ 771 static int exec_linux_imgact_try(struct image_params *iparams); 772 773 static int 774 exec_linux_imgact_try(struct image_params *imgp) 775 { 776 const char *head = (const char *)imgp->image_header; 777 int error = -1; 778 779 /* 780 * The interpreter for shell scripts run from a linux binary needs 781 * to be located in /compat/linux if possible in order to recursively 782 * maintain linux path emulation. 783 */ 784 if (((const short *)head)[0] == SHELLMAGIC) { 785 /* 786 * Run our normal shell image activator. If it succeeds attempt 787 * to use the alternate path for the interpreter. If an alternate 788 * path is found, use our stringspace to store it. 789 */ 790 if ((error = exec_shell_imgact(imgp)) == 0) { 791 char *rpath = NULL; 792 793 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 794 imgp->interpreter_name, &rpath, 0); 795 if (rpath != imgp->interpreter_name) { 796 int len = strlen(rpath) + 1; 797 798 if (len <= MAXSHELLCMDLEN) { 799 memcpy(imgp->interpreter_name, rpath, len); 800 } 801 free(rpath, M_TEMP); 802 } 803 } 804 } 805 return(error); 806 } 807 808 struct sysentvec linux_sysvec = { 809 LINUX_SYS_MAXSYSCALL, 810 linux_sysent, 811 0xff, 812 LINUX_SIGTBLSZ, 813 bsd_to_linux_signal, 814 ELAST + 1, 815 bsd_to_linux_errno, 816 translate_traps, 817 linux_fixup, 818 linux_sendsig, 819 linux_sigcode, 820 &linux_szsigcode, 821 linux_prepsyscall, 822 "Linux a.out", 823 linux_aout_coredump, 824 exec_linux_imgact_try, 825 LINUX_MINSIGSTKSZ, 826 PAGE_SIZE, 827 VM_MIN_ADDRESS, 828 VM_MAXUSER_ADDRESS, 829 USRSTACK, 830 PS_STRINGS, 831 VM_PROT_ALL, 832 exec_copyout_strings, 833 exec_setregs 834 }; 835 836 struct sysentvec elf_linux_sysvec = { 837 LINUX_SYS_MAXSYSCALL, 838 linux_sysent, 839 0xff, 840 LINUX_SIGTBLSZ, 841 bsd_to_linux_signal, 842 ELAST + 1, 843 bsd_to_linux_errno, 844 translate_traps, 845 elf_linux_fixup, 846 linux_sendsig, 847 linux_sigcode, 848 &linux_szsigcode, 849 linux_prepsyscall, 850 "Linux ELF", 851 elf32_coredump, 852 exec_linux_imgact_try, 853 LINUX_MINSIGSTKSZ, 854 PAGE_SIZE, 855 VM_MIN_ADDRESS, 856 VM_MAXUSER_ADDRESS, 857 USRSTACK, 858 PS_STRINGS, 859 VM_PROT_ALL, 860 exec_copyout_strings, 861 exec_setregs 862 }; 863 864 static Elf32_Brandinfo linux_brand = { 865 ELFOSABI_LINUX, 866 EM_386, 867 "Linux", 868 "/compat/linux", 869 "/lib/ld-linux.so.1", 870 &elf_linux_sysvec 871 }; 872 873 static Elf32_Brandinfo linux_glibc2brand = { 874 ELFOSABI_LINUX, 875 EM_386, 876 "Linux", 877 "/compat/linux", 878 "/lib/ld-linux.so.2", 879 &elf_linux_sysvec 880 }; 881 882 Elf32_Brandinfo *linux_brandlist[] = { 883 &linux_brand, 884 &linux_glibc2brand, 885 NULL 886 }; 887 888 static int 889 linux_elf_modevent(module_t mod, int type, void *data) 890 { 891 Elf32_Brandinfo **brandinfo; 892 int error; 893 struct linux_ioctl_handler **lihp; 894 895 error = 0; 896 897 switch(type) { 898 case MOD_LOAD: 899 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 900 ++brandinfo) 901 if (elf32_insert_brand_entry(*brandinfo) < 0) 902 error = EINVAL; 903 if (error == 0) { 904 SET_FOREACH(lihp, linux_ioctl_handler_set) 905 linux_ioctl_register_handler(*lihp); 906 if (bootverbose) 907 printf("Linux ELF exec handler installed\n"); 908 } else 909 printf("cannot insert Linux ELF brand handler\n"); 910 break; 911 case MOD_UNLOAD: 912 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 913 ++brandinfo) 914 if (elf32_brand_inuse(*brandinfo)) 915 error = EBUSY; 916 if (error == 0) { 917 for (brandinfo = &linux_brandlist[0]; 918 *brandinfo != NULL; ++brandinfo) 919 if (elf32_remove_brand_entry(*brandinfo) < 0) 920 error = EINVAL; 921 } 922 if (error == 0) { 923 SET_FOREACH(lihp, linux_ioctl_handler_set) 924 linux_ioctl_unregister_handler(*lihp); 925 if (bootverbose) 926 printf("Linux ELF exec handler removed\n"); 927 linux_mib_destroy(); 928 } else 929 printf("Could not deinstall ELF interpreter entry\n"); 930 break; 931 default: 932 break; 933 } 934 return error; 935 } 936 937 static moduledata_t linux_elf_mod = { 938 "linuxelf", 939 linux_elf_modevent, 940 0 941 }; 942 943 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 944