1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* XXX we use functions that might not exist. */ 32 #include "opt_compat.h" 33 34 #ifndef COMPAT_43 35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 36 #endif 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/imgact.h> 41 #include <sys/imgact_aout.h> 42 #include <sys/imgact_elf.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mutex.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysent.h> 50 #include <sys/sysproto.h> 51 #include <sys/user.h> 52 #include <sys/vnode.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_param.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_extern.h> 58 #include <sys/exec.h> 59 #include <sys/kernel.h> 60 #include <sys/module.h> 61 #include <machine/cpu.h> 62 #include <machine/md_var.h> 63 #include <sys/mutex.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 71 #include <i386/linux/linux.h> 72 #include <i386/linux/linux_proto.h> 73 #include <compat/linux/linux_signal.h> 74 #include <compat/linux/linux_util.h> 75 76 MODULE_VERSION(linux, 1); 77 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 78 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 79 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 80 81 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 82 83 #if BYTE_ORDER == LITTLE_ENDIAN 84 #define SHELLMAGIC 0x2123 /* #! */ 85 #else 86 #define SHELLMAGIC 0x2321 87 #endif 88 89 /* 90 * Allow the sendsig functions to use the ldebug() facility 91 * even though they are not syscalls themselves. Map them 92 * to syscall 0. This is slightly less bogus than using 93 * ldebug(sigreturn). 94 */ 95 #define LINUX_SYS_linux_rt_sendsig 0 96 #define LINUX_SYS_linux_sendsig 0 97 98 extern char linux_sigcode[]; 99 extern int linux_szsigcode; 100 101 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 102 103 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 104 105 static int linux_fixup(register_t **stack_base, 106 struct image_params *iparams); 107 static int elf_linux_fixup(register_t **stack_base, 108 struct image_params *iparams); 109 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 110 caddr_t *params); 111 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 112 u_long code); 113 114 /* 115 * Linux syscalls return negative errno's, we do positive and map them 116 */ 117 static int bsd_to_linux_errno[ELAST + 1] = { 118 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 119 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 120 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 121 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 122 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 123 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 124 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 125 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 126 -6, -6, -43, -42, -75, -6, -84 127 }; 128 129 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 130 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 131 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 132 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0, 133 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 134 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 135 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 136 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 137 0, LINUX_SIGUSR1, LINUX_SIGUSR2 138 }; 139 140 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 141 SIGHUP, SIGINT, SIGQUIT, SIGILL, 142 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 143 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 144 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 145 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 146 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 147 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 148 SIGIO, SIGURG, 0 149 }; 150 151 #define LINUX_T_UNKNOWN 255 152 static int _bsd_to_linux_trapcode[] = { 153 LINUX_T_UNKNOWN, /* 0 */ 154 6, /* 1 T_PRIVINFLT */ 155 LINUX_T_UNKNOWN, /* 2 */ 156 3, /* 3 T_BPTFLT */ 157 LINUX_T_UNKNOWN, /* 4 */ 158 LINUX_T_UNKNOWN, /* 5 */ 159 16, /* 6 T_ARITHTRAP */ 160 254, /* 7 T_ASTFLT */ 161 LINUX_T_UNKNOWN, /* 8 */ 162 13, /* 9 T_PROTFLT */ 163 1, /* 10 T_TRCTRAP */ 164 LINUX_T_UNKNOWN, /* 11 */ 165 14, /* 12 T_PAGEFLT */ 166 LINUX_T_UNKNOWN, /* 13 */ 167 17, /* 14 T_ALIGNFLT */ 168 LINUX_T_UNKNOWN, /* 15 */ 169 LINUX_T_UNKNOWN, /* 16 */ 170 LINUX_T_UNKNOWN, /* 17 */ 171 0, /* 18 T_DIVIDE */ 172 2, /* 19 T_NMI */ 173 4, /* 20 T_OFLOW */ 174 5, /* 21 T_BOUND */ 175 7, /* 22 T_DNA */ 176 8, /* 23 T_DOUBLEFLT */ 177 9, /* 24 T_FPOPFLT */ 178 10, /* 25 T_TSSFLT */ 179 11, /* 26 T_SEGNPFLT */ 180 12, /* 27 T_STKFLT */ 181 18, /* 28 T_MCHK */ 182 19, /* 29 T_XMMFLT */ 183 15 /* 30 T_RESERVED */ 184 }; 185 #define bsd_to_linux_trapcode(code) \ 186 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 187 _bsd_to_linux_trapcode[(code)]: \ 188 LINUX_T_UNKNOWN) 189 190 /* 191 * If FreeBSD & Linux have a difference of opinion about what a trap 192 * means, deal with it here. 193 * 194 * MPSAFE 195 */ 196 static int 197 translate_traps(int signal, int trap_code) 198 { 199 if (signal != SIGBUS) 200 return signal; 201 switch (trap_code) { 202 case T_PROTFLT: 203 case T_TSSFLT: 204 case T_DOUBLEFLT: 205 case T_PAGEFLT: 206 return SIGSEGV; 207 default: 208 return signal; 209 } 210 } 211 212 static int 213 linux_fixup(register_t **stack_base, struct image_params *imgp) 214 { 215 register_t *argv, *envp; 216 217 argv = *stack_base; 218 envp = *stack_base + (imgp->argc + 1); 219 (*stack_base)--; 220 **stack_base = (intptr_t)(void *)envp; 221 (*stack_base)--; 222 **stack_base = (intptr_t)(void *)argv; 223 (*stack_base)--; 224 **stack_base = imgp->argc; 225 return 0; 226 } 227 228 static int 229 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 230 { 231 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs; 232 register_t *pos; 233 234 pos = *stack_base + (imgp->argc + imgp->envc + 2); 235 236 if (args->trace) 237 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 238 if (args->execfd != -1) 239 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 240 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 241 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 242 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 243 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 244 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 245 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 246 AUXARGS_ENTRY(pos, AT_BASE, args->base); 247 PROC_LOCK(imgp->proc); 248 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 249 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 250 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 251 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 252 PROC_UNLOCK(imgp->proc); 253 AUXARGS_ENTRY(pos, AT_NULL, 0); 254 255 free(imgp->auxargs, M_TEMP); 256 imgp->auxargs = NULL; 257 258 (*stack_base)--; 259 **stack_base = (long)imgp->argc; 260 return 0; 261 } 262 263 extern int _ucodesel, _udatasel; 264 extern unsigned long linux_sznonrtsigcode; 265 266 static void 267 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 268 { 269 register struct thread *td = curthread; 270 register struct proc *p = td->td_proc; 271 register struct trapframe *regs; 272 struct l_rt_sigframe *fp, frame; 273 int oonstack; 274 275 PROC_LOCK_ASSERT(p, MA_OWNED); 276 regs = td->td_frame; 277 oonstack = sigonstack(regs->tf_esp); 278 279 #ifdef DEBUG 280 if (ldebug(rt_sendsig)) 281 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 282 catcher, sig, (void*)mask, code); 283 #endif 284 /* 285 * Allocate space for the signal handler context. 286 */ 287 if ((p->p_flag & P_ALTSTACK) && !oonstack && 288 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 289 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp + 290 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 291 } else 292 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 293 PROC_UNLOCK(p); 294 295 /* 296 * Build the argument list for the signal handler. 297 */ 298 if (p->p_sysent->sv_sigtbl) 299 if (sig <= p->p_sysent->sv_sigsize) 300 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 301 302 frame.sf_handler = catcher; 303 frame.sf_sig = sig; 304 frame.sf_siginfo = &fp->sf_si; 305 frame.sf_ucontext = &fp->sf_sc; 306 307 /* Fill in POSIX parts */ 308 frame.sf_si.lsi_signo = sig; 309 frame.sf_si.lsi_code = code; 310 frame.sf_si.lsi_addr = (void *)regs->tf_err; 311 312 /* 313 * Build the signal context to be used by sigreturn. 314 */ 315 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 316 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 317 318 PROC_LOCK(p); 319 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp; 320 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size; 321 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) 322 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 323 PROC_UNLOCK(p); 324 325 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 326 327 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 328 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 329 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 330 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 331 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 332 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 333 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 334 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 335 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 336 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 337 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 338 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 339 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 340 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 341 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 342 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 343 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 344 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 345 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 346 347 #ifdef DEBUG 348 if (ldebug(rt_sendsig)) 349 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 350 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp, 351 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 352 #endif 353 354 if (copyout(&frame, fp, sizeof(frame)) != 0) { 355 /* 356 * Process has trashed its stack; give it an illegal 357 * instruction to halt it in its tracks. 358 */ 359 #ifdef DEBUG 360 if (ldebug(rt_sendsig)) 361 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 362 fp, oonstack); 363 #endif 364 PROC_LOCK(p); 365 sigexit(td, SIGILL); 366 } 367 368 /* 369 * Build context to run handler in. 370 */ 371 regs->tf_esp = (int)fp; 372 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 373 linux_sznonrtsigcode; 374 regs->tf_eflags &= ~(PSL_T | PSL_VM); 375 regs->tf_cs = _ucodesel; 376 regs->tf_ds = _udatasel; 377 regs->tf_es = _udatasel; 378 regs->tf_fs = _udatasel; 379 regs->tf_ss = _udatasel; 380 PROC_LOCK(p); 381 } 382 383 384 /* 385 * Send an interrupt to process. 386 * 387 * Stack is set up to allow sigcode stored 388 * in u. to call routine, followed by kcall 389 * to sigreturn routine below. After sigreturn 390 * resets the signal mask, the stack, and the 391 * frame pointer, it returns to the user 392 * specified pc, psl. 393 */ 394 395 static void 396 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 397 { 398 register struct thread *td = curthread; 399 register struct proc *p = td->td_proc; 400 register struct trapframe *regs; 401 struct l_sigframe *fp, frame; 402 l_sigset_t lmask; 403 int oonstack, i; 404 405 PROC_LOCK_ASSERT(p, MA_OWNED); 406 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { 407 /* Signal handler installed with SA_SIGINFO. */ 408 linux_rt_sendsig(catcher, sig, mask, code); 409 return; 410 } 411 412 regs = td->td_frame; 413 oonstack = sigonstack(regs->tf_esp); 414 415 #ifdef DEBUG 416 if (ldebug(sendsig)) 417 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 418 catcher, sig, (void*)mask, code); 419 #endif 420 421 /* 422 * Allocate space for the signal handler context. 423 */ 424 if ((p->p_flag & P_ALTSTACK) && !oonstack && 425 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 426 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp + 427 p->p_sigstk.ss_size - sizeof(struct l_sigframe)); 428 } else 429 fp = (struct l_sigframe *)regs->tf_esp - 1; 430 PROC_UNLOCK(p); 431 432 /* 433 * Build the argument list for the signal handler. 434 */ 435 if (p->p_sysent->sv_sigtbl) 436 if (sig <= p->p_sysent->sv_sigsize) 437 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 438 439 frame.sf_handler = catcher; 440 frame.sf_sig = sig; 441 442 bsd_to_linux_sigset(mask, &lmask); 443 444 /* 445 * Build the signal context to be used by sigreturn. 446 */ 447 frame.sf_sc.sc_mask = lmask.__bits[0]; 448 frame.sf_sc.sc_gs = rgs(); 449 frame.sf_sc.sc_fs = regs->tf_fs; 450 frame.sf_sc.sc_es = regs->tf_es; 451 frame.sf_sc.sc_ds = regs->tf_ds; 452 frame.sf_sc.sc_edi = regs->tf_edi; 453 frame.sf_sc.sc_esi = regs->tf_esi; 454 frame.sf_sc.sc_ebp = regs->tf_ebp; 455 frame.sf_sc.sc_ebx = regs->tf_ebx; 456 frame.sf_sc.sc_edx = regs->tf_edx; 457 frame.sf_sc.sc_ecx = regs->tf_ecx; 458 frame.sf_sc.sc_eax = regs->tf_eax; 459 frame.sf_sc.sc_eip = regs->tf_eip; 460 frame.sf_sc.sc_cs = regs->tf_cs; 461 frame.sf_sc.sc_eflags = regs->tf_eflags; 462 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 463 frame.sf_sc.sc_ss = regs->tf_ss; 464 frame.sf_sc.sc_err = regs->tf_err; 465 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 466 467 bzero(&frame.sf_fpstate, sizeof(struct l_fpstate)); 468 469 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 470 frame.sf_extramask[i] = lmask.__bits[i+1]; 471 472 if (copyout(&frame, fp, sizeof(frame)) != 0) { 473 /* 474 * Process has trashed its stack; give it an illegal 475 * instruction to halt it in its tracks. 476 */ 477 PROC_LOCK(p); 478 sigexit(td, SIGILL); 479 } 480 481 /* 482 * Build context to run handler in. 483 */ 484 regs->tf_esp = (int)fp; 485 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 486 regs->tf_eflags &= ~(PSL_T | PSL_VM); 487 regs->tf_cs = _ucodesel; 488 regs->tf_ds = _udatasel; 489 regs->tf_es = _udatasel; 490 regs->tf_fs = _udatasel; 491 regs->tf_ss = _udatasel; 492 PROC_LOCK(p); 493 } 494 495 /* 496 * System call to cleanup state after a signal 497 * has been taken. Reset signal mask and 498 * stack state from context left by sendsig (above). 499 * Return to previous pc and psl as specified by 500 * context left by sendsig. Check carefully to 501 * make sure that the user has not modified the 502 * psl to gain improper privileges or to cause 503 * a machine fault. 504 */ 505 int 506 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 507 { 508 struct proc *p = td->td_proc; 509 struct l_sigframe frame; 510 register struct trapframe *regs; 511 l_sigset_t lmask; 512 int eflags, i; 513 514 regs = td->td_frame; 515 516 #ifdef DEBUG 517 if (ldebug(sigreturn)) 518 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 519 #endif 520 /* 521 * The trampoline code hands us the sigframe. 522 * It is unsafe to keep track of it ourselves, in the event that a 523 * program jumps out of a signal handler. 524 */ 525 if (copyin((caddr_t)args->sfp, &frame, sizeof(frame)) != 0) 526 return (EFAULT); 527 528 /* 529 * Check for security violations. 530 */ 531 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 532 eflags = frame.sf_sc.sc_eflags; 533 /* 534 * XXX do allow users to change the privileged flag PSL_RF. The 535 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 536 * sometimes set it there too. tf_eflags is kept in the signal 537 * context during signal handling and there is no other place 538 * to remember it, so the PSL_RF bit may be corrupted by the 539 * signal handler without us knowing. Corruption of the PSL_RF 540 * bit at worst causes one more or one less debugger trap, so 541 * allowing it is fairly harmless. 542 */ 543 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 544 return(EINVAL); 545 546 /* 547 * Don't allow users to load a valid privileged %cs. Let the 548 * hardware check for invalid selectors, excess privilege in 549 * other selectors, invalid %eip's and invalid %esp's. 550 */ 551 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 552 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 553 trapsignal(p, SIGBUS, T_PROTFLT); 554 return(EINVAL); 555 } 556 557 lmask.__bits[0] = frame.sf_sc.sc_mask; 558 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 559 lmask.__bits[i+1] = frame.sf_extramask[i]; 560 PROC_LOCK(p); 561 linux_to_bsd_sigset(&lmask, &p->p_sigmask); 562 SIG_CANTMASK(p->p_sigmask); 563 signotify(p); 564 PROC_UNLOCK(p); 565 566 /* 567 * Restore signal context. 568 */ 569 /* %gs was restored by the trampoline. */ 570 regs->tf_fs = frame.sf_sc.sc_fs; 571 regs->tf_es = frame.sf_sc.sc_es; 572 regs->tf_ds = frame.sf_sc.sc_ds; 573 regs->tf_edi = frame.sf_sc.sc_edi; 574 regs->tf_esi = frame.sf_sc.sc_esi; 575 regs->tf_ebp = frame.sf_sc.sc_ebp; 576 regs->tf_ebx = frame.sf_sc.sc_ebx; 577 regs->tf_edx = frame.sf_sc.sc_edx; 578 regs->tf_ecx = frame.sf_sc.sc_ecx; 579 regs->tf_eax = frame.sf_sc.sc_eax; 580 regs->tf_eip = frame.sf_sc.sc_eip; 581 regs->tf_cs = frame.sf_sc.sc_cs; 582 regs->tf_eflags = eflags; 583 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 584 regs->tf_ss = frame.sf_sc.sc_ss; 585 586 return (EJUSTRETURN); 587 } 588 589 /* 590 * System call to cleanup state after a signal 591 * has been taken. Reset signal mask and 592 * stack state from context left by rt_sendsig (above). 593 * Return to previous pc and psl as specified by 594 * context left by sendsig. Check carefully to 595 * make sure that the user has not modified the 596 * psl to gain improper privileges or to cause 597 * a machine fault. 598 */ 599 int 600 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 601 { 602 struct proc *p = td->td_proc; 603 struct l_ucontext uc; 604 struct l_sigcontext *context; 605 l_stack_t *lss; 606 stack_t ss; 607 register struct trapframe *regs; 608 int eflags; 609 610 regs = td->td_frame; 611 612 #ifdef DEBUG 613 if (ldebug(rt_sigreturn)) 614 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 615 #endif 616 /* 617 * The trampoline code hands us the ucontext. 618 * It is unsafe to keep track of it ourselves, in the event that a 619 * program jumps out of a signal handler. 620 */ 621 if (copyin((caddr_t)args->ucp, &uc, sizeof(uc)) != 0) 622 return (EFAULT); 623 624 context = &uc.uc_mcontext; 625 626 /* 627 * Check for security violations. 628 */ 629 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 630 eflags = context->sc_eflags; 631 /* 632 * XXX do allow users to change the privileged flag PSL_RF. The 633 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 634 * sometimes set it there too. tf_eflags is kept in the signal 635 * context during signal handling and there is no other place 636 * to remember it, so the PSL_RF bit may be corrupted by the 637 * signal handler without us knowing. Corruption of the PSL_RF 638 * bit at worst causes one more or one less debugger trap, so 639 * allowing it is fairly harmless. 640 */ 641 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 642 return(EINVAL); 643 644 /* 645 * Don't allow users to load a valid privileged %cs. Let the 646 * hardware check for invalid selectors, excess privilege in 647 * other selectors, invalid %eip's and invalid %esp's. 648 */ 649 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 650 if (!CS_SECURE(context->sc_cs)) { 651 trapsignal(p, SIGBUS, T_PROTFLT); 652 return(EINVAL); 653 } 654 655 PROC_LOCK(p); 656 linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask); 657 SIG_CANTMASK(p->p_sigmask); 658 signotify(p); 659 PROC_UNLOCK(p); 660 661 /* 662 * Restore signal context 663 */ 664 /* %gs was restored by the trampoline. */ 665 regs->tf_fs = context->sc_fs; 666 regs->tf_es = context->sc_es; 667 regs->tf_ds = context->sc_ds; 668 regs->tf_edi = context->sc_edi; 669 regs->tf_esi = context->sc_esi; 670 regs->tf_ebp = context->sc_ebp; 671 regs->tf_ebx = context->sc_ebx; 672 regs->tf_edx = context->sc_edx; 673 regs->tf_ecx = context->sc_ecx; 674 regs->tf_eax = context->sc_eax; 675 regs->tf_eip = context->sc_eip; 676 regs->tf_cs = context->sc_cs; 677 regs->tf_eflags = eflags; 678 regs->tf_esp = context->sc_esp_at_signal; 679 regs->tf_ss = context->sc_ss; 680 681 /* 682 * call sigaltstack & ignore results.. 683 */ 684 lss = &uc.uc_stack; 685 ss.ss_sp = lss->ss_sp; 686 ss.ss_size = lss->ss_size; 687 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 688 689 #ifdef DEBUG 690 if (ldebug(rt_sigreturn)) 691 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 692 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 693 #endif 694 (void)kern_sigaltstack(td, &ss, NULL); 695 696 return (EJUSTRETURN); 697 } 698 699 /* 700 * MPSAFE 701 */ 702 static void 703 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 704 { 705 args[0] = tf->tf_ebx; 706 args[1] = tf->tf_ecx; 707 args[2] = tf->tf_edx; 708 args[3] = tf->tf_esi; 709 args[4] = tf->tf_edi; 710 args[5] = tf->tf_ebp; /* Unconfirmed */ 711 *params = NULL; /* no copyin */ 712 } 713 714 715 716 /* 717 * Dump core, into a file named as described in the comments for 718 * expand_name(), unless the process was setuid/setgid. 719 */ 720 static int 721 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 722 { 723 struct proc *p = td->td_proc; 724 struct ucred *cred = td->td_ucred; 725 struct vmspace *vm = p->p_vmspace; 726 char *tempuser; 727 int error; 728 729 if (ctob((uarea_pages + kstack_pages) + 730 vm->vm_dsize + vm->vm_ssize) >= limit) 731 return (EFAULT); 732 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 733 M_WAITOK | M_ZERO); 734 if (tempuser == NULL) 735 return (ENOMEM); 736 PROC_LOCK(p); 737 fill_kinfo_proc(p, &p->p_uarea->u_kproc); 738 PROC_UNLOCK(p); 739 bcopy(p->p_uarea, tempuser, sizeof(struct user)); 740 bcopy(td->td_frame, 741 tempuser + ctob(uarea_pages) + 742 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 743 sizeof(struct trapframe)); 744 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 745 ctob(uarea_pages + kstack_pages), 746 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 747 (int *)NULL, td); 748 free(tempuser, M_TEMP); 749 if (error == 0) 750 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 751 (int)ctob(vm->vm_dsize), 752 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 753 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 754 if (error == 0) 755 error = vn_rdwr_inchunks(UIO_WRITE, vp, 756 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 757 round_page(ctob(vm->vm_ssize)), 758 (off_t)ctob(uarea_pages + kstack_pages) + 759 ctob(vm->vm_dsize), UIO_USERSPACE, 760 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 761 return (error); 762 } 763 /* 764 * If a linux binary is exec'ing something, try this image activator 765 * first. We override standard shell script execution in order to 766 * be able to modify the interpreter path. We only do this if a linux 767 * binary is doing the exec, so we do not create an EXEC module for it. 768 */ 769 static int exec_linux_imgact_try(struct image_params *iparams); 770 771 static int 772 exec_linux_imgact_try(struct image_params *imgp) 773 { 774 const char *head = (const char *)imgp->image_header; 775 int error = -1; 776 777 /* 778 * The interpreter for shell scripts run from a linux binary needs 779 * to be located in /compat/linux if possible in order to recursively 780 * maintain linux path emulation. 781 */ 782 if (((const short *)head)[0] == SHELLMAGIC) { 783 /* 784 * Run our normal shell image activator. If it succeeds attempt 785 * to use the alternate path for the interpreter. If an alternate 786 * path is found, use our stringspace to store it. 787 */ 788 if ((error = exec_shell_imgact(imgp)) == 0) { 789 char *rpath = NULL; 790 791 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 792 imgp->interpreter_name, &rpath, 0); 793 if (rpath != imgp->interpreter_name) { 794 int len = strlen(rpath) + 1; 795 796 if (len <= MAXSHELLCMDLEN) { 797 memcpy(imgp->interpreter_name, rpath, len); 798 } 799 free(rpath, M_TEMP); 800 } 801 } 802 } 803 return(error); 804 } 805 806 struct sysentvec linux_sysvec = { 807 LINUX_SYS_MAXSYSCALL, 808 linux_sysent, 809 0xff, 810 LINUX_SIGTBLSZ, 811 bsd_to_linux_signal, 812 ELAST + 1, 813 bsd_to_linux_errno, 814 translate_traps, 815 linux_fixup, 816 linux_sendsig, 817 linux_sigcode, 818 &linux_szsigcode, 819 linux_prepsyscall, 820 "Linux a.out", 821 linux_aout_coredump, 822 exec_linux_imgact_try, 823 LINUX_MINSIGSTKSZ, 824 PAGE_SIZE, 825 VM_MIN_ADDRESS, 826 VM_MAXUSER_ADDRESS, 827 USRSTACK, 828 PS_STRINGS, 829 VM_PROT_ALL, 830 exec_copyout_strings, 831 exec_setregs 832 }; 833 834 struct sysentvec elf_linux_sysvec = { 835 LINUX_SYS_MAXSYSCALL, 836 linux_sysent, 837 0xff, 838 LINUX_SIGTBLSZ, 839 bsd_to_linux_signal, 840 ELAST + 1, 841 bsd_to_linux_errno, 842 translate_traps, 843 elf_linux_fixup, 844 linux_sendsig, 845 linux_sigcode, 846 &linux_szsigcode, 847 linux_prepsyscall, 848 "Linux ELF", 849 elf32_coredump, 850 exec_linux_imgact_try, 851 LINUX_MINSIGSTKSZ, 852 PAGE_SIZE, 853 VM_MIN_ADDRESS, 854 VM_MAXUSER_ADDRESS, 855 USRSTACK, 856 PS_STRINGS, 857 VM_PROT_ALL, 858 exec_copyout_strings, 859 exec_setregs 860 }; 861 862 static Elf32_Brandinfo linux_brand = { 863 ELFOSABI_LINUX, 864 EM_386, 865 "Linux", 866 "/compat/linux", 867 "/lib/ld-linux.so.1", 868 &elf_linux_sysvec 869 }; 870 871 static Elf32_Brandinfo linux_glibc2brand = { 872 ELFOSABI_LINUX, 873 EM_386, 874 "Linux", 875 "/compat/linux", 876 "/lib/ld-linux.so.2", 877 &elf_linux_sysvec 878 }; 879 880 Elf32_Brandinfo *linux_brandlist[] = { 881 &linux_brand, 882 &linux_glibc2brand, 883 NULL 884 }; 885 886 static int 887 linux_elf_modevent(module_t mod, int type, void *data) 888 { 889 Elf32_Brandinfo **brandinfo; 890 int error; 891 struct linux_ioctl_handler **lihp; 892 893 error = 0; 894 895 switch(type) { 896 case MOD_LOAD: 897 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 898 ++brandinfo) 899 if (elf32_insert_brand_entry(*brandinfo) < 0) 900 error = EINVAL; 901 if (error == 0) { 902 SET_FOREACH(lihp, linux_ioctl_handler_set) 903 linux_ioctl_register_handler(*lihp); 904 if (bootverbose) 905 printf("Linux ELF exec handler installed\n"); 906 } else 907 printf("cannot insert Linux ELF brand handler\n"); 908 break; 909 case MOD_UNLOAD: 910 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 911 ++brandinfo) 912 if (elf32_brand_inuse(*brandinfo)) 913 error = EBUSY; 914 if (error == 0) { 915 for (brandinfo = &linux_brandlist[0]; 916 *brandinfo != NULL; ++brandinfo) 917 if (elf32_remove_brand_entry(*brandinfo) < 0) 918 error = EINVAL; 919 } 920 if (error == 0) { 921 SET_FOREACH(lihp, linux_ioctl_handler_set) 922 linux_ioctl_unregister_handler(*lihp); 923 if (bootverbose) 924 printf("Linux ELF exec handler removed\n"); 925 } else 926 printf("Could not deinstall ELF interpreter entry\n"); 927 break; 928 default: 929 break; 930 } 931 return error; 932 } 933 934 static moduledata_t linux_elf_mod = { 935 "linuxelf", 936 linux_elf_modevent, 937 0 938 }; 939 940 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 941