1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /* XXX we use functions that might not exist. */ 37 #include "opt_compat.h" 38 39 #ifndef COMPAT_43 40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 41 #endif 42 #ifndef COMPAT_IA32 43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!" 44 #endif 45 46 #define __ELF_WORD_SIZE 32 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/exec.h> 51 #include <sys/imgact.h> 52 #include <sys/imgact_elf.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/module.h> 57 #include <sys/mutex.h> 58 #include <sys/proc.h> 59 #include <sys/resourcevar.h> 60 #include <sys/signalvar.h> 61 #include <sys/sysctl.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysent.h> 64 #include <sys/sysproto.h> 65 #include <sys/vnode.h> 66 67 #include <vm/vm.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_param.h> 74 75 #include <machine/cpu.h> 76 #include <machine/md_var.h> 77 #include <machine/pcb.h> 78 #include <machine/specialreg.h> 79 80 #include <amd64/linux32/linux.h> 81 #include <amd64/linux32/linux32_proto.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_signal.h> 84 #include <compat/linux/linux_util.h> 85 86 MODULE_VERSION(linux, 1); 87 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 88 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 89 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 90 91 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 92 93 #define AUXARGS_ENTRY_32(pos, id, val) \ 94 do { \ 95 suword32(pos++, id); \ 96 suword32(pos++, val); \ 97 } while (0) 98 99 #if BYTE_ORDER == LITTLE_ENDIAN 100 #define SHELLMAGIC 0x2123 /* #! */ 101 #else 102 #define SHELLMAGIC 0x2321 103 #endif 104 105 /* 106 * Allow the sendsig functions to use the ldebug() facility 107 * even though they are not syscalls themselves. Map them 108 * to syscall 0. This is slightly less bogus than using 109 * ldebug(sigreturn). 110 */ 111 #define LINUX_SYS_linux_rt_sendsig 0 112 #define LINUX_SYS_linux_sendsig 0 113 114 extern char linux_sigcode[]; 115 extern int linux_szsigcode; 116 117 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 118 119 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 120 121 static int elf_linux_fixup(register_t **stack_base, 122 struct image_params *iparams); 123 static register_t *linux_copyout_strings(struct image_params *imgp); 124 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 125 caddr_t *params); 126 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 127 u_long code); 128 static void exec_linux_setregs(struct thread *td, u_long entry, 129 u_long stack, u_long ps_strings); 130 static void linux32_fixlimits(struct image_params *imgp); 131 132 /* 133 * Linux syscalls return negative errno's, we do positive and map them 134 */ 135 static int bsd_to_linux_errno[ELAST + 1] = { 136 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 137 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 138 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 139 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 140 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 141 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 142 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 143 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 144 -6, -6, -43, -42, -75, -6, -84 145 }; 146 147 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 148 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 149 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 150 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 151 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 152 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 153 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 154 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 155 0, LINUX_SIGUSR1, LINUX_SIGUSR2 156 }; 157 158 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 159 SIGHUP, SIGINT, SIGQUIT, SIGILL, 160 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 161 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 162 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 163 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 164 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 165 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 166 SIGIO, SIGURG, SIGSYS 167 }; 168 169 #define LINUX_T_UNKNOWN 255 170 static int _bsd_to_linux_trapcode[] = { 171 LINUX_T_UNKNOWN, /* 0 */ 172 6, /* 1 T_PRIVINFLT */ 173 LINUX_T_UNKNOWN, /* 2 */ 174 3, /* 3 T_BPTFLT */ 175 LINUX_T_UNKNOWN, /* 4 */ 176 LINUX_T_UNKNOWN, /* 5 */ 177 16, /* 6 T_ARITHTRAP */ 178 254, /* 7 T_ASTFLT */ 179 LINUX_T_UNKNOWN, /* 8 */ 180 13, /* 9 T_PROTFLT */ 181 1, /* 10 T_TRCTRAP */ 182 LINUX_T_UNKNOWN, /* 11 */ 183 14, /* 12 T_PAGEFLT */ 184 LINUX_T_UNKNOWN, /* 13 */ 185 17, /* 14 T_ALIGNFLT */ 186 LINUX_T_UNKNOWN, /* 15 */ 187 LINUX_T_UNKNOWN, /* 16 */ 188 LINUX_T_UNKNOWN, /* 17 */ 189 0, /* 18 T_DIVIDE */ 190 2, /* 19 T_NMI */ 191 4, /* 20 T_OFLOW */ 192 5, /* 21 T_BOUND */ 193 7, /* 22 T_DNA */ 194 8, /* 23 T_DOUBLEFLT */ 195 9, /* 24 T_FPOPFLT */ 196 10, /* 25 T_TSSFLT */ 197 11, /* 26 T_SEGNPFLT */ 198 12, /* 27 T_STKFLT */ 199 18, /* 28 T_MCHK */ 200 19, /* 29 T_XMMFLT */ 201 15 /* 30 T_RESERVED */ 202 }; 203 #define bsd_to_linux_trapcode(code) \ 204 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 205 _bsd_to_linux_trapcode[(code)]: \ 206 LINUX_T_UNKNOWN) 207 208 struct linux32_ps_strings { 209 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 210 int ps_nargvstr; /* the number of argument strings */ 211 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 212 int ps_nenvstr; /* the number of environment strings */ 213 }; 214 215 /* 216 * If FreeBSD & Linux have a difference of opinion about what a trap 217 * means, deal with it here. 218 * 219 * MPSAFE 220 */ 221 static int 222 translate_traps(int signal, int trap_code) 223 { 224 if (signal != SIGBUS) 225 return signal; 226 switch (trap_code) { 227 case T_PROTFLT: 228 case T_TSSFLT: 229 case T_DOUBLEFLT: 230 case T_PAGEFLT: 231 return SIGSEGV; 232 default: 233 return signal; 234 } 235 } 236 237 static int 238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 239 { 240 Elf32_Auxargs *args; 241 Elf32_Addr *base; 242 Elf32_Addr *pos; 243 244 KASSERT(curthread->td_proc == imgp->proc && 245 (curthread->td_proc->p_flag & P_SA) == 0, 246 ("unsafe elf_linux_fixup(), should be curproc")); 247 base = (Elf32_Addr *)*stack_base; 248 args = (Elf32_Auxargs *)imgp->auxargs; 249 pos = base + (imgp->args->argc + imgp->args->envc + 2); 250 251 if (args->trace) 252 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1); 253 if (args->execfd != -1) 254 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 255 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 256 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 257 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 258 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 259 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 260 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 261 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 262 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 263 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 264 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 265 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 266 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 267 268 free(imgp->auxargs, M_TEMP); 269 imgp->auxargs = NULL; 270 271 base--; 272 suword32(base, (uint32_t)imgp->args->argc); 273 *stack_base = (register_t *)base; 274 return 0; 275 } 276 277 extern int _ucodesel, _ucode32sel, _udatasel; 278 extern unsigned long linux_sznonrtsigcode; 279 280 static void 281 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 282 { 283 struct thread *td = curthread; 284 struct proc *p = td->td_proc; 285 struct sigacts *psp; 286 struct trapframe *regs; 287 struct l_rt_sigframe *fp, frame; 288 int oonstack; 289 290 PROC_LOCK_ASSERT(p, MA_OWNED); 291 psp = p->p_sigacts; 292 mtx_assert(&psp->ps_mtx, MA_OWNED); 293 regs = td->td_frame; 294 oonstack = sigonstack(regs->tf_rsp); 295 296 #ifdef DEBUG 297 if (ldebug(rt_sendsig)) 298 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 299 catcher, sig, (void*)mask, code); 300 #endif 301 /* 302 * Allocate space for the signal handler context. 303 */ 304 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 305 SIGISMEMBER(psp->ps_sigonstack, sig)) { 306 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 307 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 308 } else 309 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 310 mtx_unlock(&psp->ps_mtx); 311 312 /* 313 * Build the argument list for the signal handler. 314 */ 315 if (p->p_sysent->sv_sigtbl) 316 if (sig <= p->p_sysent->sv_sigsize) 317 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 318 319 bzero(&frame, sizeof(frame)); 320 321 frame.sf_handler = PTROUT(catcher); 322 frame.sf_sig = sig; 323 frame.sf_siginfo = PTROUT(&fp->sf_si); 324 frame.sf_ucontext = PTROUT(&fp->sf_sc); 325 326 /* Fill in POSIX parts */ 327 frame.sf_si.lsi_signo = sig; 328 frame.sf_si.lsi_code = code; 329 frame.sf_si.lsi_addr = PTROUT(regs->tf_err); 330 331 /* 332 * Build the signal context to be used by sigreturn. 333 */ 334 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 335 frame.sf_sc.uc_link = 0; /* XXX ??? */ 336 337 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 338 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 339 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 340 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 341 PROC_UNLOCK(p); 342 343 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 344 345 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 346 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 347 frame.sf_sc.uc_mcontext.sc_fs = rfs(); 348 __asm __volatile("movl %%es,%0" : 349 "=rm" (frame.sf_sc.uc_mcontext.sc_es)); 350 __asm __volatile("movl %%ds,%0" : 351 "=rm" (frame.sf_sc.uc_mcontext.sc_ds)); 352 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 353 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 354 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 355 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 356 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 357 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 358 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 359 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 360 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 361 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 362 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 363 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 364 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 365 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 366 367 #ifdef DEBUG 368 if (ldebug(rt_sendsig)) 369 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 370 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 371 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 372 #endif 373 374 if (copyout(&frame, fp, sizeof(frame)) != 0) { 375 /* 376 * Process has trashed its stack; give it an illegal 377 * instruction to halt it in its tracks. 378 */ 379 #ifdef DEBUG 380 if (ldebug(rt_sendsig)) 381 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 382 fp, oonstack); 383 #endif 384 PROC_LOCK(p); 385 sigexit(td, SIGILL); 386 } 387 388 /* 389 * Build context to run handler in. 390 */ 391 regs->tf_rsp = PTROUT(fp); 392 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 393 linux_sznonrtsigcode; 394 regs->tf_rflags &= ~PSL_T; 395 regs->tf_cs = _ucode32sel; 396 regs->tf_ss = _udatasel; 397 load_ds(_udatasel); 398 td->td_pcb->pcb_ds = _udatasel; 399 load_es(_udatasel); 400 td->td_pcb->pcb_es = _udatasel; 401 PROC_LOCK(p); 402 mtx_lock(&psp->ps_mtx); 403 } 404 405 406 /* 407 * Send an interrupt to process. 408 * 409 * Stack is set up to allow sigcode stored 410 * in u. to call routine, followed by kcall 411 * to sigreturn routine below. After sigreturn 412 * resets the signal mask, the stack, and the 413 * frame pointer, it returns to the user 414 * specified pc, psl. 415 */ 416 static void 417 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 418 { 419 struct thread *td = curthread; 420 struct proc *p = td->td_proc; 421 struct sigacts *psp; 422 struct trapframe *regs; 423 struct l_sigframe *fp, frame; 424 l_sigset_t lmask; 425 int oonstack, i; 426 427 PROC_LOCK_ASSERT(p, MA_OWNED); 428 psp = p->p_sigacts; 429 mtx_assert(&psp->ps_mtx, MA_OWNED); 430 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 431 /* Signal handler installed with SA_SIGINFO. */ 432 linux_rt_sendsig(catcher, sig, mask, code); 433 return; 434 } 435 436 regs = td->td_frame; 437 oonstack = sigonstack(regs->tf_rsp); 438 439 #ifdef DEBUG 440 if (ldebug(sendsig)) 441 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 442 catcher, sig, (void*)mask, code); 443 #endif 444 445 /* 446 * Allocate space for the signal handler context. 447 */ 448 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 449 SIGISMEMBER(psp->ps_sigonstack, sig)) { 450 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 451 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 452 } else 453 fp = (struct l_sigframe *)regs->tf_rsp - 1; 454 mtx_unlock(&psp->ps_mtx); 455 PROC_UNLOCK(p); 456 457 /* 458 * Build the argument list for the signal handler. 459 */ 460 if (p->p_sysent->sv_sigtbl) 461 if (sig <= p->p_sysent->sv_sigsize) 462 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 463 464 bzero(&frame, sizeof(frame)); 465 466 frame.sf_handler = PTROUT(catcher); 467 frame.sf_sig = sig; 468 469 bsd_to_linux_sigset(mask, &lmask); 470 471 /* 472 * Build the signal context to be used by sigreturn. 473 */ 474 frame.sf_sc.sc_mask = lmask.__bits[0]; 475 frame.sf_sc.sc_gs = rgs(); 476 frame.sf_sc.sc_fs = rfs(); 477 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es)); 478 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds)); 479 frame.sf_sc.sc_edi = regs->tf_rdi; 480 frame.sf_sc.sc_esi = regs->tf_rsi; 481 frame.sf_sc.sc_ebp = regs->tf_rbp; 482 frame.sf_sc.sc_ebx = regs->tf_rbx; 483 frame.sf_sc.sc_edx = regs->tf_rdx; 484 frame.sf_sc.sc_ecx = regs->tf_rcx; 485 frame.sf_sc.sc_eax = regs->tf_rax; 486 frame.sf_sc.sc_eip = regs->tf_rip; 487 frame.sf_sc.sc_cs = regs->tf_cs; 488 frame.sf_sc.sc_eflags = regs->tf_rflags; 489 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 490 frame.sf_sc.sc_ss = regs->tf_ss; 491 frame.sf_sc.sc_err = regs->tf_err; 492 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 493 494 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 495 frame.sf_extramask[i] = lmask.__bits[i+1]; 496 497 if (copyout(&frame, fp, sizeof(frame)) != 0) { 498 /* 499 * Process has trashed its stack; give it an illegal 500 * instruction to halt it in its tracks. 501 */ 502 PROC_LOCK(p); 503 sigexit(td, SIGILL); 504 } 505 506 /* 507 * Build context to run handler in. 508 */ 509 regs->tf_rsp = PTROUT(fp); 510 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 511 regs->tf_rflags &= ~PSL_T; 512 regs->tf_cs = _ucode32sel; 513 regs->tf_ss = _udatasel; 514 load_ds(_udatasel); 515 td->td_pcb->pcb_ds = _udatasel; 516 load_es(_udatasel); 517 td->td_pcb->pcb_es = _udatasel; 518 PROC_LOCK(p); 519 mtx_lock(&psp->ps_mtx); 520 } 521 522 /* 523 * System call to cleanup state after a signal 524 * has been taken. Reset signal mask and 525 * stack state from context left by sendsig (above). 526 * Return to previous pc and psl as specified by 527 * context left by sendsig. Check carefully to 528 * make sure that the user has not modified the 529 * psl to gain improper privileges or to cause 530 * a machine fault. 531 */ 532 int 533 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 534 { 535 struct proc *p = td->td_proc; 536 struct l_sigframe frame; 537 struct trapframe *regs; 538 l_sigset_t lmask; 539 int eflags, i; 540 541 regs = td->td_frame; 542 543 #ifdef DEBUG 544 if (ldebug(sigreturn)) 545 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 546 #endif 547 /* 548 * The trampoline code hands us the sigframe. 549 * It is unsafe to keep track of it ourselves, in the event that a 550 * program jumps out of a signal handler. 551 */ 552 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 553 return (EFAULT); 554 555 /* 556 * Check for security violations. 557 */ 558 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 559 eflags = frame.sf_sc.sc_eflags; 560 /* 561 * XXX do allow users to change the privileged flag PSL_RF. The 562 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 563 * sometimes set it there too. tf_eflags is kept in the signal 564 * context during signal handling and there is no other place 565 * to remember it, so the PSL_RF bit may be corrupted by the 566 * signal handler without us knowing. Corruption of the PSL_RF 567 * bit at worst causes one more or one less debugger trap, so 568 * allowing it is fairly harmless. 569 */ 570 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 571 return(EINVAL); 572 573 /* 574 * Don't allow users to load a valid privileged %cs. Let the 575 * hardware check for invalid selectors, excess privilege in 576 * other selectors, invalid %eip's and invalid %esp's. 577 */ 578 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 579 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 580 trapsignal(td, SIGBUS, T_PROTFLT); 581 return(EINVAL); 582 } 583 584 lmask.__bits[0] = frame.sf_sc.sc_mask; 585 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 586 lmask.__bits[i+1] = frame.sf_extramask[i]; 587 PROC_LOCK(p); 588 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 589 SIG_CANTMASK(td->td_sigmask); 590 signotify(td); 591 PROC_UNLOCK(p); 592 593 /* 594 * Restore signal context. 595 */ 596 /* Selectors were restored by the trampoline. */ 597 regs->tf_rdi = frame.sf_sc.sc_edi; 598 regs->tf_rsi = frame.sf_sc.sc_esi; 599 regs->tf_rbp = frame.sf_sc.sc_ebp; 600 regs->tf_rbx = frame.sf_sc.sc_ebx; 601 regs->tf_rdx = frame.sf_sc.sc_edx; 602 regs->tf_rcx = frame.sf_sc.sc_ecx; 603 regs->tf_rax = frame.sf_sc.sc_eax; 604 regs->tf_rip = frame.sf_sc.sc_eip; 605 regs->tf_cs = frame.sf_sc.sc_cs; 606 regs->tf_rflags = eflags; 607 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 608 regs->tf_ss = frame.sf_sc.sc_ss; 609 610 return (EJUSTRETURN); 611 } 612 613 /* 614 * System call to cleanup state after a signal 615 * has been taken. Reset signal mask and 616 * stack state from context left by rt_sendsig (above). 617 * Return to previous pc and psl as specified by 618 * context left by sendsig. Check carefully to 619 * make sure that the user has not modified the 620 * psl to gain improper privileges or to cause 621 * a machine fault. 622 */ 623 int 624 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 625 { 626 struct proc *p = td->td_proc; 627 struct l_ucontext uc; 628 struct l_sigcontext *context; 629 l_stack_t *lss; 630 stack_t ss; 631 struct trapframe *regs; 632 int eflags; 633 634 regs = td->td_frame; 635 636 #ifdef DEBUG 637 if (ldebug(rt_sigreturn)) 638 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 639 #endif 640 /* 641 * The trampoline code hands us the ucontext. 642 * It is unsafe to keep track of it ourselves, in the event that a 643 * program jumps out of a signal handler. 644 */ 645 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 646 return (EFAULT); 647 648 context = &uc.uc_mcontext; 649 650 /* 651 * Check for security violations. 652 */ 653 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 654 eflags = context->sc_eflags; 655 /* 656 * XXX do allow users to change the privileged flag PSL_RF. The 657 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 658 * sometimes set it there too. tf_eflags is kept in the signal 659 * context during signal handling and there is no other place 660 * to remember it, so the PSL_RF bit may be corrupted by the 661 * signal handler without us knowing. Corruption of the PSL_RF 662 * bit at worst causes one more or one less debugger trap, so 663 * allowing it is fairly harmless. 664 */ 665 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 666 return(EINVAL); 667 668 /* 669 * Don't allow users to load a valid privileged %cs. Let the 670 * hardware check for invalid selectors, excess privilege in 671 * other selectors, invalid %eip's and invalid %esp's. 672 */ 673 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 674 if (!CS_SECURE(context->sc_cs)) { 675 trapsignal(td, SIGBUS, T_PROTFLT); 676 return(EINVAL); 677 } 678 679 PROC_LOCK(p); 680 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 681 SIG_CANTMASK(td->td_sigmask); 682 signotify(td); 683 PROC_UNLOCK(p); 684 685 /* 686 * Restore signal context 687 */ 688 /* Selectors were restored by the trampoline. */ 689 regs->tf_rdi = context->sc_edi; 690 regs->tf_rsi = context->sc_esi; 691 regs->tf_rbp = context->sc_ebp; 692 regs->tf_rbx = context->sc_ebx; 693 regs->tf_rdx = context->sc_edx; 694 regs->tf_rcx = context->sc_ecx; 695 regs->tf_rax = context->sc_eax; 696 regs->tf_rip = context->sc_eip; 697 regs->tf_cs = context->sc_cs; 698 regs->tf_rflags = eflags; 699 regs->tf_rsp = context->sc_esp_at_signal; 700 regs->tf_ss = context->sc_ss; 701 702 /* 703 * call sigaltstack & ignore results.. 704 */ 705 lss = &uc.uc_stack; 706 ss.ss_sp = PTRIN(lss->ss_sp); 707 ss.ss_size = lss->ss_size; 708 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 709 710 #ifdef DEBUG 711 if (ldebug(rt_sigreturn)) 712 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 713 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 714 #endif 715 (void)kern_sigaltstack(td, &ss, NULL); 716 717 return (EJUSTRETURN); 718 } 719 720 /* 721 * MPSAFE 722 */ 723 static void 724 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 725 { 726 args[0] = tf->tf_rbx; 727 args[1] = tf->tf_rcx; 728 args[2] = tf->tf_rdx; 729 args[3] = tf->tf_rsi; 730 args[4] = tf->tf_rdi; 731 args[5] = tf->tf_rbp; /* Unconfirmed */ 732 *params = NULL; /* no copyin */ 733 } 734 735 /* 736 * If a linux binary is exec'ing something, try this image activator 737 * first. We override standard shell script execution in order to 738 * be able to modify the interpreter path. We only do this if a linux 739 * binary is doing the exec, so we do not create an EXEC module for it. 740 */ 741 static int exec_linux_imgact_try(struct image_params *iparams); 742 743 static int 744 exec_linux_imgact_try(struct image_params *imgp) 745 { 746 const char *head = (const char *)imgp->image_header; 747 char *rpath; 748 int error = -1, len; 749 750 /* 751 * The interpreter for shell scripts run from a linux binary needs 752 * to be located in /compat/linux if possible in order to recursively 753 * maintain linux path emulation. 754 */ 755 if (((const short *)head)[0] == SHELLMAGIC) { 756 /* 757 * Run our normal shell image activator. If it succeeds attempt 758 * to use the alternate path for the interpreter. If an alternate 759 * path is found, use our stringspace to store it. 760 */ 761 if ((error = exec_shell_imgact(imgp)) == 0) { 762 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 763 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 764 if (rpath != NULL) { 765 len = strlen(rpath) + 1; 766 767 if (len <= MAXSHELLCMDLEN) { 768 memcpy(imgp->interpreter_name, rpath, len); 769 } 770 free(rpath, M_TEMP); 771 } 772 } 773 } 774 return(error); 775 } 776 777 /* 778 * Clear registers on exec 779 * XXX copied from ia32_signal.c. 780 */ 781 static void 782 exec_linux_setregs(td, entry, stack, ps_strings) 783 struct thread *td; 784 u_long entry; 785 u_long stack; 786 u_long ps_strings; 787 { 788 struct trapframe *regs = td->td_frame; 789 struct pcb *pcb = td->td_pcb; 790 791 wrmsr(MSR_FSBASE, 0); 792 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 793 pcb->pcb_fsbase = 0; 794 pcb->pcb_gsbase = 0; 795 load_ds(_udatasel); 796 load_es(_udatasel); 797 load_fs(_udatasel); 798 load_gs(0); 799 pcb->pcb_ds = _udatasel; 800 pcb->pcb_es = _udatasel; 801 pcb->pcb_fs = _udatasel; 802 pcb->pcb_gs = 0; 803 804 bzero((char *)regs, sizeof(struct trapframe)); 805 regs->tf_rip = entry; 806 regs->tf_rsp = stack; 807 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 808 regs->tf_ss = _udatasel; 809 regs->tf_cs = _ucode32sel; 810 regs->tf_rbx = ps_strings; 811 load_cr0(rcr0() | CR0_MP | CR0_TS); 812 813 /* Return via doreti so that we can change to a different %cs */ 814 pcb->pcb_flags |= PCB_FULLCTX; 815 td->td_retval[1] = 0; 816 } 817 818 /* 819 * XXX copied from ia32_sysvec.c. 820 */ 821 static register_t * 822 linux_copyout_strings(struct image_params *imgp) 823 { 824 int argc, envc; 825 u_int32_t *vectp; 826 char *stringp, *destp; 827 u_int32_t *stack_base; 828 struct linux32_ps_strings *arginfo; 829 int sigcodesz; 830 831 /* 832 * Calculate string base and vector table pointers. 833 * Also deal with signal trampoline code for this exec type. 834 */ 835 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 836 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode); 837 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE - 838 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 839 840 /* 841 * install sigcode 842 */ 843 if (sigcodesz) 844 copyout(imgp->proc->p_sysent->sv_sigcode, 845 ((caddr_t)arginfo - sigcodesz), szsigcode); 846 847 /* 848 * If we have a valid auxargs ptr, prepare some room 849 * on the stack. 850 */ 851 if (imgp->auxargs) { 852 /* 853 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 854 * lower compatibility. 855 */ 856 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size 857 : (AT_COUNT * 2); 858 /* 859 * The '+ 2' is for the null pointers at the end of each of 860 * the arg and env vector sets,and imgp->auxarg_size is room 861 * for argument of Runtime loader. 862 */ 863 vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + 864 imgp->auxarg_size) * sizeof(u_int32_t)); 865 866 } else 867 /* 868 * The '+ 2' is for the null pointers at the end of each of 869 * the arg and env vector sets 870 */ 871 vectp = (u_int32_t *) 872 (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); 873 874 /* 875 * vectp also becomes our initial stack base 876 */ 877 stack_base = vectp; 878 879 stringp = imgp->args->begin_argv; 880 argc = imgp->args->argc; 881 envc = imgp->args->envc; 882 /* 883 * Copy out strings - arguments and environment. 884 */ 885 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 886 887 /* 888 * Fill in "ps_strings" struct for ps, w, etc. 889 */ 890 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); 891 suword32(&arginfo->ps_nargvstr, argc); 892 893 /* 894 * Fill in argument portion of vector table. 895 */ 896 for (; argc > 0; --argc) { 897 suword32(vectp++, (u_int32_t)(intptr_t)destp); 898 while (*stringp++ != 0) 899 destp++; 900 destp++; 901 } 902 903 /* a null vector table pointer separates the argp's from the envp's */ 904 suword32(vectp++, 0); 905 906 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); 907 suword32(&arginfo->ps_nenvstr, envc); 908 909 /* 910 * Fill in environment portion of vector table. 911 */ 912 for (; envc > 0; --envc) { 913 suword32(vectp++, (u_int32_t)(intptr_t)destp); 914 while (*stringp++ != 0) 915 destp++; 916 destp++; 917 } 918 919 /* end of vector table is a null pointer */ 920 suword32(vectp, 0); 921 922 return ((register_t *)stack_base); 923 } 924 925 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 926 "32-bit Linux emulation"); 927 928 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 929 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 930 &linux32_maxdsiz, 0, ""); 931 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 932 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 933 &linux32_maxssiz, 0, ""); 934 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 935 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 936 &linux32_maxvmem, 0, ""); 937 938 /* 939 * XXX copied from ia32_sysvec.c. 940 */ 941 static void 942 linux32_fixlimits(struct image_params *imgp) 943 { 944 struct proc *p = imgp->proc; 945 struct plimit *oldlim, *newlim; 946 947 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 && 948 linux32_maxvmem == 0) 949 return; 950 newlim = lim_alloc(); 951 PROC_LOCK(p); 952 oldlim = p->p_limit; 953 lim_copy(newlim, oldlim); 954 if (linux32_maxdsiz != 0) { 955 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz) 956 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz; 957 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz) 958 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz; 959 } 960 if (linux32_maxssiz != 0) { 961 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz) 962 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz; 963 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz) 964 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz; 965 } 966 if (linux32_maxvmem != 0) { 967 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem) 968 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem; 969 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem) 970 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem; 971 } 972 p->p_limit = newlim; 973 PROC_UNLOCK(p); 974 lim_free(oldlim); 975 } 976 977 struct sysentvec elf_linux_sysvec = { 978 LINUX_SYS_MAXSYSCALL, 979 linux_sysent, 980 0xff, 981 LINUX_SIGTBLSZ, 982 bsd_to_linux_signal, 983 ELAST + 1, 984 bsd_to_linux_errno, 985 translate_traps, 986 elf_linux_fixup, 987 linux_sendsig, 988 linux_sigcode, 989 &linux_szsigcode, 990 linux_prepsyscall, 991 "Linux ELF32", 992 elf32_coredump, 993 exec_linux_imgact_try, 994 LINUX_MINSIGSTKSZ, 995 PAGE_SIZE, 996 VM_MIN_ADDRESS, 997 LINUX32_USRSTACK, 998 LINUX32_USRSTACK, 999 LINUX32_PS_STRINGS, 1000 VM_PROT_ALL, 1001 linux_copyout_strings, 1002 exec_linux_setregs, 1003 linux32_fixlimits 1004 }; 1005 1006 static Elf32_Brandinfo linux_brand = { 1007 ELFOSABI_LINUX, 1008 EM_386, 1009 "Linux", 1010 "/compat/linux", 1011 "/lib/ld-linux.so.1", 1012 &elf_linux_sysvec, 1013 NULL, 1014 }; 1015 1016 static Elf32_Brandinfo linux_glibc2brand = { 1017 ELFOSABI_LINUX, 1018 EM_386, 1019 "Linux", 1020 "/compat/linux", 1021 "/lib/ld-linux.so.2", 1022 &elf_linux_sysvec, 1023 NULL, 1024 }; 1025 1026 Elf32_Brandinfo *linux_brandlist[] = { 1027 &linux_brand, 1028 &linux_glibc2brand, 1029 NULL 1030 }; 1031 1032 static int 1033 linux_elf_modevent(module_t mod, int type, void *data) 1034 { 1035 Elf32_Brandinfo **brandinfo; 1036 int error; 1037 struct linux_ioctl_handler **lihp; 1038 1039 error = 0; 1040 1041 switch(type) { 1042 case MOD_LOAD: 1043 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1044 ++brandinfo) 1045 if (elf32_insert_brand_entry(*brandinfo) < 0) 1046 error = EINVAL; 1047 if (error == 0) { 1048 SET_FOREACH(lihp, linux_ioctl_handler_set) 1049 linux_ioctl_register_handler(*lihp); 1050 if (bootverbose) 1051 printf("Linux ELF exec handler installed\n"); 1052 } else 1053 printf("cannot insert Linux ELF brand handler\n"); 1054 break; 1055 case MOD_UNLOAD: 1056 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1057 ++brandinfo) 1058 if (elf32_brand_inuse(*brandinfo)) 1059 error = EBUSY; 1060 if (error == 0) { 1061 for (brandinfo = &linux_brandlist[0]; 1062 *brandinfo != NULL; ++brandinfo) 1063 if (elf32_remove_brand_entry(*brandinfo) < 0) 1064 error = EINVAL; 1065 } 1066 if (error == 0) { 1067 SET_FOREACH(lihp, linux_ioctl_handler_set) 1068 linux_ioctl_unregister_handler(*lihp); 1069 if (bootverbose) 1070 printf("Linux ELF exec handler removed\n"); 1071 linux_mib_destroy(); 1072 } else 1073 printf("Could not deinstall ELF interpreter entry\n"); 1074 break; 1075 default: 1076 break; 1077 } 1078 return error; 1079 } 1080 1081 static moduledata_t linux_elf_mod = { 1082 "linuxelf", 1083 linux_elf_modevent, 1084 0 1085 }; 1086 1087 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1088