1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 /* XXX we use functions that might not exist. */ 32 #include "opt_compat.h" 33 34 #ifndef COMPAT_43 35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 36 #endif 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/imgact.h> 41 #include <sys/imgact_aout.h> 42 #include <sys/imgact_elf.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mutex.h> 46 #include <sys/proc.h> 47 #include <sys/signalvar.h> 48 #include <sys/syscallsubr.h> 49 #include <sys/sysent.h> 50 #include <sys/sysproto.h> 51 #include <sys/user.h> 52 #include <sys/vnode.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_param.h> 56 #include <vm/vm_page.h> 57 #include <vm/vm_extern.h> 58 #include <sys/exec.h> 59 #include <sys/kernel.h> 60 #include <sys/module.h> 61 #include <machine/cpu.h> 62 #include <machine/md_var.h> 63 #include <sys/mutex.h> 64 65 #include <vm/vm.h> 66 #include <vm/vm_param.h> 67 #include <vm/pmap.h> 68 #include <vm/vm_map.h> 69 #include <vm/vm_object.h> 70 71 #include <i386/linux/linux.h> 72 #include <i386/linux/linux_proto.h> 73 #include <compat/linux/linux_mib.h> 74 #include <compat/linux/linux_signal.h> 75 #include <compat/linux/linux_util.h> 76 77 MODULE_VERSION(linux, 1); 78 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 79 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 80 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 81 82 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 83 84 #if BYTE_ORDER == LITTLE_ENDIAN 85 #define SHELLMAGIC 0x2123 /* #! */ 86 #else 87 #define SHELLMAGIC 0x2321 88 #endif 89 90 /* 91 * Allow the sendsig functions to use the ldebug() facility 92 * even though they are not syscalls themselves. Map them 93 * to syscall 0. This is slightly less bogus than using 94 * ldebug(sigreturn). 95 */ 96 #define LINUX_SYS_linux_rt_sendsig 0 97 #define LINUX_SYS_linux_sendsig 0 98 99 extern char linux_sigcode[]; 100 extern int linux_szsigcode; 101 102 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 103 104 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 105 106 static int linux_fixup(register_t **stack_base, 107 struct image_params *iparams); 108 static int elf_linux_fixup(register_t **stack_base, 109 struct image_params *iparams); 110 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 111 caddr_t *params); 112 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 113 u_long code); 114 115 /* 116 * Linux syscalls return negative errno's, we do positive and map them 117 */ 118 static int bsd_to_linux_errno[ELAST + 1] = { 119 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 120 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 121 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 122 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 123 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 124 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 125 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 126 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 127 -6, -6, -43, -42, -75, -6, -84 128 }; 129 130 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 131 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 132 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 133 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 134 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 135 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 136 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 137 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 138 0, LINUX_SIGUSR1, LINUX_SIGUSR2 139 }; 140 141 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 142 SIGHUP, SIGINT, SIGQUIT, SIGILL, 143 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 144 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 145 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 146 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 147 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 148 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 149 SIGIO, SIGURG, SIGSYS 150 }; 151 152 #define LINUX_T_UNKNOWN 255 153 static int _bsd_to_linux_trapcode[] = { 154 LINUX_T_UNKNOWN, /* 0 */ 155 6, /* 1 T_PRIVINFLT */ 156 LINUX_T_UNKNOWN, /* 2 */ 157 3, /* 3 T_BPTFLT */ 158 LINUX_T_UNKNOWN, /* 4 */ 159 LINUX_T_UNKNOWN, /* 5 */ 160 16, /* 6 T_ARITHTRAP */ 161 254, /* 7 T_ASTFLT */ 162 LINUX_T_UNKNOWN, /* 8 */ 163 13, /* 9 T_PROTFLT */ 164 1, /* 10 T_TRCTRAP */ 165 LINUX_T_UNKNOWN, /* 11 */ 166 14, /* 12 T_PAGEFLT */ 167 LINUX_T_UNKNOWN, /* 13 */ 168 17, /* 14 T_ALIGNFLT */ 169 LINUX_T_UNKNOWN, /* 15 */ 170 LINUX_T_UNKNOWN, /* 16 */ 171 LINUX_T_UNKNOWN, /* 17 */ 172 0, /* 18 T_DIVIDE */ 173 2, /* 19 T_NMI */ 174 4, /* 20 T_OFLOW */ 175 5, /* 21 T_BOUND */ 176 7, /* 22 T_DNA */ 177 8, /* 23 T_DOUBLEFLT */ 178 9, /* 24 T_FPOPFLT */ 179 10, /* 25 T_TSSFLT */ 180 11, /* 26 T_SEGNPFLT */ 181 12, /* 27 T_STKFLT */ 182 18, /* 28 T_MCHK */ 183 19, /* 29 T_XMMFLT */ 184 15 /* 30 T_RESERVED */ 185 }; 186 #define bsd_to_linux_trapcode(code) \ 187 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 188 _bsd_to_linux_trapcode[(code)]: \ 189 LINUX_T_UNKNOWN) 190 191 /* 192 * If FreeBSD & Linux have a difference of opinion about what a trap 193 * means, deal with it here. 194 * 195 * MPSAFE 196 */ 197 static int 198 translate_traps(int signal, int trap_code) 199 { 200 if (signal != SIGBUS) 201 return signal; 202 switch (trap_code) { 203 case T_PROTFLT: 204 case T_TSSFLT: 205 case T_DOUBLEFLT: 206 case T_PAGEFLT: 207 return SIGSEGV; 208 default: 209 return signal; 210 } 211 } 212 213 static int 214 linux_fixup(register_t **stack_base, struct image_params *imgp) 215 { 216 register_t *argv, *envp; 217 218 argv = *stack_base; 219 envp = *stack_base + (imgp->argc + 1); 220 (*stack_base)--; 221 **stack_base = (intptr_t)(void *)envp; 222 (*stack_base)--; 223 **stack_base = (intptr_t)(void *)argv; 224 (*stack_base)--; 225 **stack_base = imgp->argc; 226 return 0; 227 } 228 229 static int 230 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 231 { 232 Elf32_Auxargs *args; 233 register_t *pos; 234 235 KASSERT(curthread->td_proc == imgp->proc && 236 (curthread->td_proc->p_flag & P_THREADED) == 0, 237 ("unsafe elf_linux_fixup(), should be curproc")); 238 args = (Elf32_Auxargs *)imgp->auxargs; 239 pos = *stack_base + (imgp->argc + imgp->envc + 2); 240 241 if (args->trace) 242 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 243 if (args->execfd != -1) 244 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 245 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 246 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 247 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 248 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 249 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 250 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 251 AUXARGS_ENTRY(pos, AT_BASE, args->base); 252 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 253 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 254 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 255 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 256 AUXARGS_ENTRY(pos, AT_NULL, 0); 257 258 free(imgp->auxargs, M_TEMP); 259 imgp->auxargs = NULL; 260 261 (*stack_base)--; 262 **stack_base = (register_t)imgp->argc; 263 return 0; 264 } 265 266 extern int _ucodesel, _udatasel; 267 extern unsigned long linux_sznonrtsigcode; 268 269 static void 270 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 271 { 272 struct thread *td = curthread; 273 struct proc *p = td->td_proc; 274 struct trapframe *regs; 275 struct l_rt_sigframe *fp, frame; 276 int oonstack; 277 278 PROC_LOCK_ASSERT(p, MA_OWNED); 279 regs = td->td_frame; 280 oonstack = sigonstack(regs->tf_esp); 281 282 #ifdef DEBUG 283 if (ldebug(rt_sendsig)) 284 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 285 catcher, sig, (void*)mask, code); 286 #endif 287 /* 288 * Allocate space for the signal handler context. 289 */ 290 if ((p->p_flag & P_ALTSTACK) && !oonstack && 291 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 292 fp = (struct l_rt_sigframe *)(p->p_sigstk.ss_sp + 293 p->p_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 294 } else 295 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 296 PROC_UNLOCK(p); 297 298 /* 299 * Build the argument list for the signal handler. 300 */ 301 if (p->p_sysent->sv_sigtbl) 302 if (sig <= p->p_sysent->sv_sigsize) 303 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 304 305 bzero(&frame, sizeof(frame)); 306 307 frame.sf_handler = catcher; 308 frame.sf_sig = sig; 309 frame.sf_siginfo = &fp->sf_si; 310 frame.sf_ucontext = &fp->sf_sc; 311 312 /* Fill in POSIX parts */ 313 frame.sf_si.lsi_signo = sig; 314 frame.sf_si.lsi_code = code; 315 frame.sf_si.lsi_addr = (void *)regs->tf_err; 316 317 /* 318 * Build the signal context to be used by sigreturn. 319 */ 320 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 321 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 322 323 PROC_LOCK(p); 324 frame.sf_sc.uc_stack.ss_sp = p->p_sigstk.ss_sp; 325 frame.sf_sc.uc_stack.ss_size = p->p_sigstk.ss_size; 326 frame.sf_sc.uc_stack.ss_flags = (p->p_flag & P_ALTSTACK) 327 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 328 PROC_UNLOCK(p); 329 330 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 331 332 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 333 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 334 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 335 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 336 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 337 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 338 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 339 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 340 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 341 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 342 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 343 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 344 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 345 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 346 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 347 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 348 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 349 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 350 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 351 352 #ifdef DEBUG 353 if (ldebug(rt_sendsig)) 354 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 355 frame.sf_sc.uc_stack.ss_flags, p->p_sigstk.ss_sp, 356 p->p_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 357 #endif 358 359 if (copyout(&frame, fp, sizeof(frame)) != 0) { 360 /* 361 * Process has trashed its stack; give it an illegal 362 * instruction to halt it in its tracks. 363 */ 364 #ifdef DEBUG 365 if (ldebug(rt_sendsig)) 366 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 367 fp, oonstack); 368 #endif 369 PROC_LOCK(p); 370 sigexit(td, SIGILL); 371 } 372 373 /* 374 * Build context to run handler in. 375 */ 376 regs->tf_esp = (int)fp; 377 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 378 linux_sznonrtsigcode; 379 regs->tf_eflags &= ~(PSL_T | PSL_VM); 380 regs->tf_cs = _ucodesel; 381 regs->tf_ds = _udatasel; 382 regs->tf_es = _udatasel; 383 regs->tf_fs = _udatasel; 384 regs->tf_ss = _udatasel; 385 PROC_LOCK(p); 386 } 387 388 389 /* 390 * Send an interrupt to process. 391 * 392 * Stack is set up to allow sigcode stored 393 * in u. to call routine, followed by kcall 394 * to sigreturn routine below. After sigreturn 395 * resets the signal mask, the stack, and the 396 * frame pointer, it returns to the user 397 * specified pc, psl. 398 */ 399 static void 400 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 401 { 402 struct thread *td = curthread; 403 struct proc *p = td->td_proc; 404 struct trapframe *regs; 405 struct l_sigframe *fp, frame; 406 l_sigset_t lmask; 407 int oonstack, i; 408 409 PROC_LOCK_ASSERT(p, MA_OWNED); 410 if (SIGISMEMBER(p->p_sigacts->ps_siginfo, sig)) { 411 /* Signal handler installed with SA_SIGINFO. */ 412 linux_rt_sendsig(catcher, sig, mask, code); 413 return; 414 } 415 416 regs = td->td_frame; 417 oonstack = sigonstack(regs->tf_esp); 418 419 #ifdef DEBUG 420 if (ldebug(sendsig)) 421 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 422 catcher, sig, (void*)mask, code); 423 #endif 424 425 /* 426 * Allocate space for the signal handler context. 427 */ 428 if ((p->p_flag & P_ALTSTACK) && !oonstack && 429 SIGISMEMBER(p->p_sigacts->ps_sigonstack, sig)) { 430 fp = (struct l_sigframe *)(p->p_sigstk.ss_sp + 431 p->p_sigstk.ss_size - sizeof(struct l_sigframe)); 432 } else 433 fp = (struct l_sigframe *)regs->tf_esp - 1; 434 PROC_UNLOCK(p); 435 436 /* 437 * Build the argument list for the signal handler. 438 */ 439 if (p->p_sysent->sv_sigtbl) 440 if (sig <= p->p_sysent->sv_sigsize) 441 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 442 443 bzero(&frame, sizeof(frame)); 444 445 frame.sf_handler = catcher; 446 frame.sf_sig = sig; 447 448 bsd_to_linux_sigset(mask, &lmask); 449 450 /* 451 * Build the signal context to be used by sigreturn. 452 */ 453 frame.sf_sc.sc_mask = lmask.__bits[0]; 454 frame.sf_sc.sc_gs = rgs(); 455 frame.sf_sc.sc_fs = regs->tf_fs; 456 frame.sf_sc.sc_es = regs->tf_es; 457 frame.sf_sc.sc_ds = regs->tf_ds; 458 frame.sf_sc.sc_edi = regs->tf_edi; 459 frame.sf_sc.sc_esi = regs->tf_esi; 460 frame.sf_sc.sc_ebp = regs->tf_ebp; 461 frame.sf_sc.sc_ebx = regs->tf_ebx; 462 frame.sf_sc.sc_edx = regs->tf_edx; 463 frame.sf_sc.sc_ecx = regs->tf_ecx; 464 frame.sf_sc.sc_eax = regs->tf_eax; 465 frame.sf_sc.sc_eip = regs->tf_eip; 466 frame.sf_sc.sc_cs = regs->tf_cs; 467 frame.sf_sc.sc_eflags = regs->tf_eflags; 468 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 469 frame.sf_sc.sc_ss = regs->tf_ss; 470 frame.sf_sc.sc_err = regs->tf_err; 471 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 472 473 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 474 frame.sf_extramask[i] = lmask.__bits[i+1]; 475 476 if (copyout(&frame, fp, sizeof(frame)) != 0) { 477 /* 478 * Process has trashed its stack; give it an illegal 479 * instruction to halt it in its tracks. 480 */ 481 PROC_LOCK(p); 482 sigexit(td, SIGILL); 483 } 484 485 /* 486 * Build context to run handler in. 487 */ 488 regs->tf_esp = (int)fp; 489 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 490 regs->tf_eflags &= ~(PSL_T | PSL_VM); 491 regs->tf_cs = _ucodesel; 492 regs->tf_ds = _udatasel; 493 regs->tf_es = _udatasel; 494 regs->tf_fs = _udatasel; 495 regs->tf_ss = _udatasel; 496 PROC_LOCK(p); 497 } 498 499 /* 500 * System call to cleanup state after a signal 501 * has been taken. Reset signal mask and 502 * stack state from context left by sendsig (above). 503 * Return to previous pc and psl as specified by 504 * context left by sendsig. Check carefully to 505 * make sure that the user has not modified the 506 * psl to gain improper privileges or to cause 507 * a machine fault. 508 */ 509 int 510 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 511 { 512 struct proc *p = td->td_proc; 513 struct l_sigframe frame; 514 struct trapframe *regs; 515 l_sigset_t lmask; 516 int eflags, i; 517 518 regs = td->td_frame; 519 520 #ifdef DEBUG 521 if (ldebug(sigreturn)) 522 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 523 #endif 524 /* 525 * The trampoline code hands us the sigframe. 526 * It is unsafe to keep track of it ourselves, in the event that a 527 * program jumps out of a signal handler. 528 */ 529 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 530 return (EFAULT); 531 532 /* 533 * Check for security violations. 534 */ 535 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 536 eflags = frame.sf_sc.sc_eflags; 537 /* 538 * XXX do allow users to change the privileged flag PSL_RF. The 539 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 540 * sometimes set it there too. tf_eflags is kept in the signal 541 * context during signal handling and there is no other place 542 * to remember it, so the PSL_RF bit may be corrupted by the 543 * signal handler without us knowing. Corruption of the PSL_RF 544 * bit at worst causes one more or one less debugger trap, so 545 * allowing it is fairly harmless. 546 */ 547 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 548 return(EINVAL); 549 550 /* 551 * Don't allow users to load a valid privileged %cs. Let the 552 * hardware check for invalid selectors, excess privilege in 553 * other selectors, invalid %eip's and invalid %esp's. 554 */ 555 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 556 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 557 trapsignal(p, SIGBUS, T_PROTFLT); 558 return(EINVAL); 559 } 560 561 lmask.__bits[0] = frame.sf_sc.sc_mask; 562 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 563 lmask.__bits[i+1] = frame.sf_extramask[i]; 564 PROC_LOCK(p); 565 linux_to_bsd_sigset(&lmask, &p->p_sigmask); 566 SIG_CANTMASK(p->p_sigmask); 567 signotify(p); 568 PROC_UNLOCK(p); 569 570 /* 571 * Restore signal context. 572 */ 573 /* %gs was restored by the trampoline. */ 574 regs->tf_fs = frame.sf_sc.sc_fs; 575 regs->tf_es = frame.sf_sc.sc_es; 576 regs->tf_ds = frame.sf_sc.sc_ds; 577 regs->tf_edi = frame.sf_sc.sc_edi; 578 regs->tf_esi = frame.sf_sc.sc_esi; 579 regs->tf_ebp = frame.sf_sc.sc_ebp; 580 regs->tf_ebx = frame.sf_sc.sc_ebx; 581 regs->tf_edx = frame.sf_sc.sc_edx; 582 regs->tf_ecx = frame.sf_sc.sc_ecx; 583 regs->tf_eax = frame.sf_sc.sc_eax; 584 regs->tf_eip = frame.sf_sc.sc_eip; 585 regs->tf_cs = frame.sf_sc.sc_cs; 586 regs->tf_eflags = eflags; 587 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 588 regs->tf_ss = frame.sf_sc.sc_ss; 589 590 return (EJUSTRETURN); 591 } 592 593 /* 594 * System call to cleanup state after a signal 595 * has been taken. Reset signal mask and 596 * stack state from context left by rt_sendsig (above). 597 * Return to previous pc and psl as specified by 598 * context left by sendsig. Check carefully to 599 * make sure that the user has not modified the 600 * psl to gain improper privileges or to cause 601 * a machine fault. 602 */ 603 int 604 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 605 { 606 struct proc *p = td->td_proc; 607 struct l_ucontext uc; 608 struct l_sigcontext *context; 609 l_stack_t *lss; 610 stack_t ss; 611 struct trapframe *regs; 612 int eflags; 613 614 regs = td->td_frame; 615 616 #ifdef DEBUG 617 if (ldebug(rt_sigreturn)) 618 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 619 #endif 620 /* 621 * The trampoline code hands us the ucontext. 622 * It is unsafe to keep track of it ourselves, in the event that a 623 * program jumps out of a signal handler. 624 */ 625 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 626 return (EFAULT); 627 628 context = &uc.uc_mcontext; 629 630 /* 631 * Check for security violations. 632 */ 633 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 634 eflags = context->sc_eflags; 635 /* 636 * XXX do allow users to change the privileged flag PSL_RF. The 637 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 638 * sometimes set it there too. tf_eflags is kept in the signal 639 * context during signal handling and there is no other place 640 * to remember it, so the PSL_RF bit may be corrupted by the 641 * signal handler without us knowing. Corruption of the PSL_RF 642 * bit at worst causes one more or one less debugger trap, so 643 * allowing it is fairly harmless. 644 */ 645 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 646 return(EINVAL); 647 648 /* 649 * Don't allow users to load a valid privileged %cs. Let the 650 * hardware check for invalid selectors, excess privilege in 651 * other selectors, invalid %eip's and invalid %esp's. 652 */ 653 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 654 if (!CS_SECURE(context->sc_cs)) { 655 trapsignal(p, SIGBUS, T_PROTFLT); 656 return(EINVAL); 657 } 658 659 PROC_LOCK(p); 660 linux_to_bsd_sigset(&uc.uc_sigmask, &p->p_sigmask); 661 SIG_CANTMASK(p->p_sigmask); 662 signotify(p); 663 PROC_UNLOCK(p); 664 665 /* 666 * Restore signal context 667 */ 668 /* %gs was restored by the trampoline. */ 669 regs->tf_fs = context->sc_fs; 670 regs->tf_es = context->sc_es; 671 regs->tf_ds = context->sc_ds; 672 regs->tf_edi = context->sc_edi; 673 regs->tf_esi = context->sc_esi; 674 regs->tf_ebp = context->sc_ebp; 675 regs->tf_ebx = context->sc_ebx; 676 regs->tf_edx = context->sc_edx; 677 regs->tf_ecx = context->sc_ecx; 678 regs->tf_eax = context->sc_eax; 679 regs->tf_eip = context->sc_eip; 680 regs->tf_cs = context->sc_cs; 681 regs->tf_eflags = eflags; 682 regs->tf_esp = context->sc_esp_at_signal; 683 regs->tf_ss = context->sc_ss; 684 685 /* 686 * call sigaltstack & ignore results.. 687 */ 688 lss = &uc.uc_stack; 689 ss.ss_sp = lss->ss_sp; 690 ss.ss_size = lss->ss_size; 691 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 692 693 #ifdef DEBUG 694 if (ldebug(rt_sigreturn)) 695 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 696 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 697 #endif 698 (void)kern_sigaltstack(td, &ss, NULL); 699 700 return (EJUSTRETURN); 701 } 702 703 /* 704 * MPSAFE 705 */ 706 static void 707 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 708 { 709 args[0] = tf->tf_ebx; 710 args[1] = tf->tf_ecx; 711 args[2] = tf->tf_edx; 712 args[3] = tf->tf_esi; 713 args[4] = tf->tf_edi; 714 args[5] = tf->tf_ebp; /* Unconfirmed */ 715 *params = NULL; /* no copyin */ 716 } 717 718 719 720 /* 721 * Dump core, into a file named as described in the comments for 722 * expand_name(), unless the process was setuid/setgid. 723 */ 724 static int 725 linux_aout_coredump(struct thread *td, struct vnode *vp, off_t limit) 726 { 727 struct proc *p = td->td_proc; 728 struct ucred *cred = td->td_ucred; 729 struct vmspace *vm = p->p_vmspace; 730 char *tempuser; 731 int error; 732 733 if (ctob((uarea_pages + kstack_pages) + 734 vm->vm_dsize + vm->vm_ssize) >= limit) 735 return (EFAULT); 736 tempuser = malloc(ctob(uarea_pages + kstack_pages), M_TEMP, 737 M_WAITOK | M_ZERO); 738 if (tempuser == NULL) 739 return (ENOMEM); 740 PROC_LOCK(p); 741 fill_kinfo_proc(p, &p->p_uarea->u_kproc); 742 PROC_UNLOCK(p); 743 bcopy(p->p_uarea, tempuser, sizeof(struct user)); 744 bcopy(td->td_frame, 745 tempuser + ctob(uarea_pages) + 746 ((caddr_t)td->td_frame - (caddr_t)td->td_kstack), 747 sizeof(struct trapframe)); 748 error = vn_rdwr(UIO_WRITE, vp, (caddr_t)tempuser, 749 ctob(uarea_pages + kstack_pages), 750 (off_t)0, UIO_SYSSPACE, IO_UNIT, cred, NOCRED, 751 (int *)NULL, td); 752 free(tempuser, M_TEMP); 753 if (error == 0) 754 error = vn_rdwr(UIO_WRITE, vp, vm->vm_daddr, 755 (int)ctob(vm->vm_dsize), 756 (off_t)ctob(uarea_pages + kstack_pages), UIO_USERSPACE, 757 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 758 if (error == 0) 759 error = vn_rdwr_inchunks(UIO_WRITE, vp, 760 (caddr_t)trunc_page(USRSTACK - ctob(vm->vm_ssize)), 761 round_page(ctob(vm->vm_ssize)), 762 (off_t)ctob(uarea_pages + kstack_pages) + 763 ctob(vm->vm_dsize), UIO_USERSPACE, 764 IO_UNIT | IO_DIRECT, cred, NOCRED, (int *) NULL, td); 765 return (error); 766 } 767 /* 768 * If a linux binary is exec'ing something, try this image activator 769 * first. We override standard shell script execution in order to 770 * be able to modify the interpreter path. We only do this if a linux 771 * binary is doing the exec, so we do not create an EXEC module for it. 772 */ 773 static int exec_linux_imgact_try(struct image_params *iparams); 774 775 static int 776 exec_linux_imgact_try(struct image_params *imgp) 777 { 778 const char *head = (const char *)imgp->image_header; 779 int error = -1; 780 781 /* 782 * The interpreter for shell scripts run from a linux binary needs 783 * to be located in /compat/linux if possible in order to recursively 784 * maintain linux path emulation. 785 */ 786 if (((const short *)head)[0] == SHELLMAGIC) { 787 /* 788 * Run our normal shell image activator. If it succeeds attempt 789 * to use the alternate path for the interpreter. If an alternate 790 * path is found, use our stringspace to store it. 791 */ 792 if ((error = exec_shell_imgact(imgp)) == 0) { 793 char *rpath = NULL; 794 795 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 796 imgp->interpreter_name, &rpath, 0); 797 if (rpath != imgp->interpreter_name) { 798 int len = strlen(rpath) + 1; 799 800 if (len <= MAXSHELLCMDLEN) { 801 memcpy(imgp->interpreter_name, rpath, len); 802 } 803 free(rpath, M_TEMP); 804 } 805 } 806 } 807 return(error); 808 } 809 810 struct sysentvec linux_sysvec = { 811 LINUX_SYS_MAXSYSCALL, 812 linux_sysent, 813 0xff, 814 LINUX_SIGTBLSZ, 815 bsd_to_linux_signal, 816 ELAST + 1, 817 bsd_to_linux_errno, 818 translate_traps, 819 linux_fixup, 820 linux_sendsig, 821 linux_sigcode, 822 &linux_szsigcode, 823 linux_prepsyscall, 824 "Linux a.out", 825 linux_aout_coredump, 826 exec_linux_imgact_try, 827 LINUX_MINSIGSTKSZ, 828 PAGE_SIZE, 829 VM_MIN_ADDRESS, 830 VM_MAXUSER_ADDRESS, 831 USRSTACK, 832 PS_STRINGS, 833 VM_PROT_ALL, 834 exec_copyout_strings, 835 exec_setregs 836 }; 837 838 struct sysentvec elf_linux_sysvec = { 839 LINUX_SYS_MAXSYSCALL, 840 linux_sysent, 841 0xff, 842 LINUX_SIGTBLSZ, 843 bsd_to_linux_signal, 844 ELAST + 1, 845 bsd_to_linux_errno, 846 translate_traps, 847 elf_linux_fixup, 848 linux_sendsig, 849 linux_sigcode, 850 &linux_szsigcode, 851 linux_prepsyscall, 852 "Linux ELF", 853 elf32_coredump, 854 exec_linux_imgact_try, 855 LINUX_MINSIGSTKSZ, 856 PAGE_SIZE, 857 VM_MIN_ADDRESS, 858 VM_MAXUSER_ADDRESS, 859 USRSTACK, 860 PS_STRINGS, 861 VM_PROT_ALL, 862 exec_copyout_strings, 863 exec_setregs 864 }; 865 866 static Elf32_Brandinfo linux_brand = { 867 ELFOSABI_LINUX, 868 EM_386, 869 "Linux", 870 "/compat/linux", 871 "/lib/ld-linux.so.1", 872 &elf_linux_sysvec 873 }; 874 875 static Elf32_Brandinfo linux_glibc2brand = { 876 ELFOSABI_LINUX, 877 EM_386, 878 "Linux", 879 "/compat/linux", 880 "/lib/ld-linux.so.2", 881 &elf_linux_sysvec 882 }; 883 884 Elf32_Brandinfo *linux_brandlist[] = { 885 &linux_brand, 886 &linux_glibc2brand, 887 NULL 888 }; 889 890 static int 891 linux_elf_modevent(module_t mod, int type, void *data) 892 { 893 Elf32_Brandinfo **brandinfo; 894 int error; 895 struct linux_ioctl_handler **lihp; 896 897 error = 0; 898 899 switch(type) { 900 case MOD_LOAD: 901 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 902 ++brandinfo) 903 if (elf32_insert_brand_entry(*brandinfo) < 0) 904 error = EINVAL; 905 if (error == 0) { 906 SET_FOREACH(lihp, linux_ioctl_handler_set) 907 linux_ioctl_register_handler(*lihp); 908 if (bootverbose) 909 printf("Linux ELF exec handler installed\n"); 910 } else 911 printf("cannot insert Linux ELF brand handler\n"); 912 break; 913 case MOD_UNLOAD: 914 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 915 ++brandinfo) 916 if (elf32_brand_inuse(*brandinfo)) 917 error = EBUSY; 918 if (error == 0) { 919 for (brandinfo = &linux_brandlist[0]; 920 *brandinfo != NULL; ++brandinfo) 921 if (elf32_remove_brand_entry(*brandinfo) < 0) 922 error = EINVAL; 923 } 924 if (error == 0) { 925 SET_FOREACH(lihp, linux_ioctl_handler_set) 926 linux_ioctl_unregister_handler(*lihp); 927 if (bootverbose) 928 printf("Linux ELF exec handler removed\n"); 929 linux_mib_destroy(); 930 } else 931 printf("Could not deinstall ELF interpreter entry\n"); 932 break; 933 default: 934 break; 935 } 936 return error; 937 } 938 939 static moduledata_t linux_elf_mod = { 940 "linuxelf", 941 linux_elf_modevent, 942 0 943 }; 944 945 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 946