1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /* XXX we use functions that might not exist. */ 37 #include "opt_compat.h" 38 39 #ifndef COMPAT_43 40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 41 #endif 42 #ifndef COMPAT_IA32 43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!" 44 #endif 45 46 #define __ELF_WORD_SIZE 32 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/exec.h> 51 #include <sys/imgact.h> 52 #include <sys/imgact_elf.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/module.h> 57 #include <sys/mutex.h> 58 #include <sys/proc.h> 59 #include <sys/resourcevar.h> 60 #include <sys/signalvar.h> 61 #include <sys/sysctl.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysent.h> 64 #include <sys/sysproto.h> 65 #include <sys/vnode.h> 66 67 #include <vm/vm.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_param.h> 74 75 #include <machine/cpu.h> 76 #include <machine/md_var.h> 77 #include <machine/pcb.h> 78 #include <machine/specialreg.h> 79 80 #include <amd64/linux32/linux.h> 81 #include <amd64/linux32/linux32_proto.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_signal.h> 84 #include <compat/linux/linux_util.h> 85 86 MODULE_VERSION(linux, 1); 87 MODULE_DEPEND(linux, sysvmsg, 1, 1, 1); 88 MODULE_DEPEND(linux, sysvsem, 1, 1, 1); 89 MODULE_DEPEND(linux, sysvshm, 1, 1, 1); 90 91 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 92 93 #define AUXARGS_ENTRY_32(pos, id, val) \ 94 do { \ 95 suword32(pos++, id); \ 96 suword32(pos++, val); \ 97 } while (0) 98 99 #if BYTE_ORDER == LITTLE_ENDIAN 100 #define SHELLMAGIC 0x2123 /* #! */ 101 #else 102 #define SHELLMAGIC 0x2321 103 #endif 104 105 /* 106 * Allow the sendsig functions to use the ldebug() facility 107 * even though they are not syscalls themselves. Map them 108 * to syscall 0. This is slightly less bogus than using 109 * ldebug(sigreturn). 110 */ 111 #define LINUX_SYS_linux_rt_sendsig 0 112 #define LINUX_SYS_linux_sendsig 0 113 114 extern char linux_sigcode[]; 115 extern int linux_szsigcode; 116 117 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 118 119 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 120 121 static int elf_linux_fixup(register_t **stack_base, 122 struct image_params *iparams); 123 static register_t *linux_copyout_strings(struct image_params *imgp); 124 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 125 caddr_t *params); 126 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 127 u_long code); 128 static void exec_linux_setregs(struct thread *td, u_long entry, 129 u_long stack, u_long ps_strings); 130 static void linux32_fixlimits(struct image_params *imgp); 131 132 /* 133 * Linux syscalls return negative errno's, we do positive and map them 134 */ 135 static int bsd_to_linux_errno[ELAST + 1] = { 136 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 137 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 138 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 139 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 140 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 141 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 142 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 143 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 144 -6, -6, -43, -42, -75, -6, -84 145 }; 146 147 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 148 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 149 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 150 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 151 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 152 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 153 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 154 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 155 0, LINUX_SIGUSR1, LINUX_SIGUSR2 156 }; 157 158 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 159 SIGHUP, SIGINT, SIGQUIT, SIGILL, 160 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 161 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 162 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 163 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 164 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 165 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 166 SIGIO, SIGURG, SIGSYS 167 }; 168 169 #define LINUX_T_UNKNOWN 255 170 static int _bsd_to_linux_trapcode[] = { 171 LINUX_T_UNKNOWN, /* 0 */ 172 6, /* 1 T_PRIVINFLT */ 173 LINUX_T_UNKNOWN, /* 2 */ 174 3, /* 3 T_BPTFLT */ 175 LINUX_T_UNKNOWN, /* 4 */ 176 LINUX_T_UNKNOWN, /* 5 */ 177 16, /* 6 T_ARITHTRAP */ 178 254, /* 7 T_ASTFLT */ 179 LINUX_T_UNKNOWN, /* 8 */ 180 13, /* 9 T_PROTFLT */ 181 1, /* 10 T_TRCTRAP */ 182 LINUX_T_UNKNOWN, /* 11 */ 183 14, /* 12 T_PAGEFLT */ 184 LINUX_T_UNKNOWN, /* 13 */ 185 17, /* 14 T_ALIGNFLT */ 186 LINUX_T_UNKNOWN, /* 15 */ 187 LINUX_T_UNKNOWN, /* 16 */ 188 LINUX_T_UNKNOWN, /* 17 */ 189 0, /* 18 T_DIVIDE */ 190 2, /* 19 T_NMI */ 191 4, /* 20 T_OFLOW */ 192 5, /* 21 T_BOUND */ 193 7, /* 22 T_DNA */ 194 8, /* 23 T_DOUBLEFLT */ 195 9, /* 24 T_FPOPFLT */ 196 10, /* 25 T_TSSFLT */ 197 11, /* 26 T_SEGNPFLT */ 198 12, /* 27 T_STKFLT */ 199 18, /* 28 T_MCHK */ 200 19, /* 29 T_XMMFLT */ 201 15 /* 30 T_RESERVED */ 202 }; 203 #define bsd_to_linux_trapcode(code) \ 204 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 205 _bsd_to_linux_trapcode[(code)]: \ 206 LINUX_T_UNKNOWN) 207 208 struct linux32_ps_strings { 209 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 210 int ps_nargvstr; /* the number of argument strings */ 211 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 212 int ps_nenvstr; /* the number of environment strings */ 213 }; 214 215 /* 216 * If FreeBSD & Linux have a difference of opinion about what a trap 217 * means, deal with it here. 218 * 219 * MPSAFE 220 */ 221 static int 222 translate_traps(int signal, int trap_code) 223 { 224 if (signal != SIGBUS) 225 return signal; 226 switch (trap_code) { 227 case T_PROTFLT: 228 case T_TSSFLT: 229 case T_DOUBLEFLT: 230 case T_PAGEFLT: 231 return SIGSEGV; 232 default: 233 return signal; 234 } 235 } 236 237 static int 238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 239 { 240 Elf32_Auxargs *args; 241 Elf32_Addr *base; 242 Elf32_Addr *pos; 243 244 KASSERT(curthread->td_proc == imgp->proc && 245 (curthread->td_proc->p_flag & P_SA) == 0, 246 ("unsafe elf_linux_fixup(), should be curproc")); 247 base = (Elf32_Addr *)*stack_base; 248 args = (Elf32_Auxargs *)imgp->auxargs; 249 pos = base + (imgp->args->argc + imgp->args->envc + 2); 250 251 if (args->trace) 252 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1); 253 if (args->execfd != -1) 254 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 255 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 256 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 257 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 258 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 259 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 260 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 261 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 262 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 263 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 264 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 265 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 266 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 267 268 free(imgp->auxargs, M_TEMP); 269 imgp->auxargs = NULL; 270 271 base--; 272 suword32(base, (uint32_t)imgp->args->argc); 273 *stack_base = (register_t *)base; 274 return 0; 275 } 276 277 extern int _ucodesel, _ucode32sel, _udatasel; 278 extern unsigned long linux_sznonrtsigcode; 279 280 static void 281 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 282 { 283 struct thread *td = curthread; 284 struct proc *p = td->td_proc; 285 struct sigacts *psp; 286 struct trapframe *regs; 287 struct l_rt_sigframe *fp, frame; 288 int oonstack; 289 290 PROC_LOCK_ASSERT(p, MA_OWNED); 291 psp = p->p_sigacts; 292 mtx_assert(&psp->ps_mtx, MA_OWNED); 293 regs = td->td_frame; 294 oonstack = sigonstack(regs->tf_rsp); 295 296 #ifdef DEBUG 297 if (ldebug(rt_sendsig)) 298 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 299 catcher, sig, (void*)mask, code); 300 #endif 301 /* 302 * Allocate space for the signal handler context. 303 */ 304 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 305 SIGISMEMBER(psp->ps_sigonstack, sig)) { 306 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 307 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 308 } else 309 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 310 mtx_unlock(&psp->ps_mtx); 311 312 /* 313 * Build the argument list for the signal handler. 314 */ 315 if (p->p_sysent->sv_sigtbl) 316 if (sig <= p->p_sysent->sv_sigsize) 317 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 318 319 bzero(&frame, sizeof(frame)); 320 321 frame.sf_handler = PTROUT(catcher); 322 frame.sf_sig = sig; 323 frame.sf_siginfo = PTROUT(&fp->sf_si); 324 frame.sf_ucontext = PTROUT(&fp->sf_sc); 325 326 /* Fill in POSIX parts */ 327 frame.sf_si.lsi_signo = sig; 328 frame.sf_si.lsi_code = code; 329 frame.sf_si.lsi_addr = PTROUT(regs->tf_err); 330 331 /* 332 * Build the signal context to be used by sigreturn. 333 */ 334 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 335 frame.sf_sc.uc_link = 0; /* XXX ??? */ 336 337 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 338 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 339 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 340 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 341 PROC_UNLOCK(p); 342 343 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 344 345 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 346 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 347 frame.sf_sc.uc_mcontext.sc_fs = rfs(); 348 __asm __volatile("movl %%es,%0" : 349 "=rm" (frame.sf_sc.uc_mcontext.sc_es)); 350 __asm __volatile("movl %%ds,%0" : 351 "=rm" (frame.sf_sc.uc_mcontext.sc_ds)); 352 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 353 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 354 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 355 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 356 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 357 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 358 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 359 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 360 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 361 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 362 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 363 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 364 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 365 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 366 367 #ifdef DEBUG 368 if (ldebug(rt_sendsig)) 369 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 370 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 371 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 372 #endif 373 374 if (copyout(&frame, fp, sizeof(frame)) != 0) { 375 /* 376 * Process has trashed its stack; give it an illegal 377 * instruction to halt it in its tracks. 378 */ 379 #ifdef DEBUG 380 if (ldebug(rt_sendsig)) 381 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 382 fp, oonstack); 383 #endif 384 PROC_LOCK(p); 385 sigexit(td, SIGILL); 386 } 387 388 /* 389 * Build context to run handler in. 390 */ 391 regs->tf_rsp = PTROUT(fp); 392 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 393 linux_sznonrtsigcode; 394 regs->tf_rflags &= ~PSL_T; 395 regs->tf_cs = _ucode32sel; 396 regs->tf_ss = _udatasel; 397 load_ds(_udatasel); 398 td->td_pcb->pcb_ds = _udatasel; 399 load_es(_udatasel); 400 td->td_pcb->pcb_es = _udatasel; 401 PROC_LOCK(p); 402 mtx_lock(&psp->ps_mtx); 403 } 404 405 406 /* 407 * Send an interrupt to process. 408 * 409 * Stack is set up to allow sigcode stored 410 * in u. to call routine, followed by kcall 411 * to sigreturn routine below. After sigreturn 412 * resets the signal mask, the stack, and the 413 * frame pointer, it returns to the user 414 * specified pc, psl. 415 */ 416 static void 417 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 418 { 419 struct thread *td = curthread; 420 struct proc *p = td->td_proc; 421 struct sigacts *psp; 422 struct trapframe *regs; 423 struct l_sigframe *fp, frame; 424 l_sigset_t lmask; 425 int oonstack, i; 426 427 PROC_LOCK_ASSERT(p, MA_OWNED); 428 psp = p->p_sigacts; 429 mtx_assert(&psp->ps_mtx, MA_OWNED); 430 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 431 /* Signal handler installed with SA_SIGINFO. */ 432 linux_rt_sendsig(catcher, sig, mask, code); 433 return; 434 } 435 436 regs = td->td_frame; 437 oonstack = sigonstack(regs->tf_rsp); 438 439 #ifdef DEBUG 440 if (ldebug(sendsig)) 441 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 442 catcher, sig, (void*)mask, code); 443 #endif 444 445 /* 446 * Allocate space for the signal handler context. 447 */ 448 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 449 SIGISMEMBER(psp->ps_sigonstack, sig)) { 450 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 451 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 452 } else 453 fp = (struct l_sigframe *)regs->tf_rsp - 1; 454 mtx_unlock(&psp->ps_mtx); 455 PROC_UNLOCK(p); 456 457 /* 458 * Build the argument list for the signal handler. 459 */ 460 if (p->p_sysent->sv_sigtbl) 461 if (sig <= p->p_sysent->sv_sigsize) 462 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 463 464 bzero(&frame, sizeof(frame)); 465 466 frame.sf_handler = PTROUT(catcher); 467 frame.sf_sig = sig; 468 469 bsd_to_linux_sigset(mask, &lmask); 470 471 /* 472 * Build the signal context to be used by sigreturn. 473 */ 474 frame.sf_sc.sc_mask = lmask.__bits[0]; 475 frame.sf_sc.sc_gs = rgs(); 476 frame.sf_sc.sc_fs = rfs(); 477 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es)); 478 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds)); 479 frame.sf_sc.sc_edi = regs->tf_rdi; 480 frame.sf_sc.sc_esi = regs->tf_rsi; 481 frame.sf_sc.sc_ebp = regs->tf_rbp; 482 frame.sf_sc.sc_ebx = regs->tf_rbx; 483 frame.sf_sc.sc_edx = regs->tf_rdx; 484 frame.sf_sc.sc_ecx = regs->tf_rcx; 485 frame.sf_sc.sc_eax = regs->tf_rax; 486 frame.sf_sc.sc_eip = regs->tf_rip; 487 frame.sf_sc.sc_cs = regs->tf_cs; 488 frame.sf_sc.sc_eflags = regs->tf_rflags; 489 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 490 frame.sf_sc.sc_ss = regs->tf_ss; 491 frame.sf_sc.sc_err = regs->tf_err; 492 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 493 494 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 495 frame.sf_extramask[i] = lmask.__bits[i+1]; 496 497 if (copyout(&frame, fp, sizeof(frame)) != 0) { 498 /* 499 * Process has trashed its stack; give it an illegal 500 * instruction to halt it in its tracks. 501 */ 502 PROC_LOCK(p); 503 sigexit(td, SIGILL); 504 } 505 506 /* 507 * Build context to run handler in. 508 */ 509 regs->tf_rsp = PTROUT(fp); 510 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 511 regs->tf_rflags &= ~PSL_T; 512 regs->tf_cs = _ucode32sel; 513 regs->tf_ss = _udatasel; 514 load_ds(_udatasel); 515 td->td_pcb->pcb_ds = _udatasel; 516 load_es(_udatasel); 517 td->td_pcb->pcb_es = _udatasel; 518 PROC_LOCK(p); 519 mtx_lock(&psp->ps_mtx); 520 } 521 522 /* 523 * System call to cleanup state after a signal 524 * has been taken. Reset signal mask and 525 * stack state from context left by sendsig (above). 526 * Return to previous pc and psl as specified by 527 * context left by sendsig. Check carefully to 528 * make sure that the user has not modified the 529 * psl to gain improper privileges or to cause 530 * a machine fault. 531 */ 532 int 533 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 534 { 535 struct proc *p = td->td_proc; 536 struct l_sigframe frame; 537 struct trapframe *regs; 538 l_sigset_t lmask; 539 int eflags, i; 540 541 regs = td->td_frame; 542 543 #ifdef DEBUG 544 if (ldebug(sigreturn)) 545 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 546 #endif 547 /* 548 * The trampoline code hands us the sigframe. 549 * It is unsafe to keep track of it ourselves, in the event that a 550 * program jumps out of a signal handler. 551 */ 552 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 553 return (EFAULT); 554 555 /* 556 * Check for security violations. 557 */ 558 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 559 eflags = frame.sf_sc.sc_eflags; 560 /* 561 * XXX do allow users to change the privileged flag PSL_RF. The 562 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 563 * sometimes set it there too. tf_eflags is kept in the signal 564 * context during signal handling and there is no other place 565 * to remember it, so the PSL_RF bit may be corrupted by the 566 * signal handler without us knowing. Corruption of the PSL_RF 567 * bit at worst causes one more or one less debugger trap, so 568 * allowing it is fairly harmless. 569 */ 570 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 571 return(EINVAL); 572 573 /* 574 * Don't allow users to load a valid privileged %cs. Let the 575 * hardware check for invalid selectors, excess privilege in 576 * other selectors, invalid %eip's and invalid %esp's. 577 */ 578 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 579 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 580 trapsignal(td, SIGBUS, T_PROTFLT); 581 return(EINVAL); 582 } 583 584 lmask.__bits[0] = frame.sf_sc.sc_mask; 585 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 586 lmask.__bits[i+1] = frame.sf_extramask[i]; 587 PROC_LOCK(p); 588 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 589 SIG_CANTMASK(td->td_sigmask); 590 signotify(td); 591 PROC_UNLOCK(p); 592 593 /* 594 * Restore signal context. 595 */ 596 /* Selectors were restored by the trampoline. */ 597 regs->tf_rdi = frame.sf_sc.sc_edi; 598 regs->tf_rsi = frame.sf_sc.sc_esi; 599 regs->tf_rbp = frame.sf_sc.sc_ebp; 600 regs->tf_rbx = frame.sf_sc.sc_ebx; 601 regs->tf_rdx = frame.sf_sc.sc_edx; 602 regs->tf_rcx = frame.sf_sc.sc_ecx; 603 regs->tf_rax = frame.sf_sc.sc_eax; 604 regs->tf_rip = frame.sf_sc.sc_eip; 605 regs->tf_cs = frame.sf_sc.sc_cs; 606 regs->tf_rflags = eflags; 607 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 608 regs->tf_ss = frame.sf_sc.sc_ss; 609 610 return (EJUSTRETURN); 611 } 612 613 /* 614 * System call to cleanup state after a signal 615 * has been taken. Reset signal mask and 616 * stack state from context left by rt_sendsig (above). 617 * Return to previous pc and psl as specified by 618 * context left by sendsig. Check carefully to 619 * make sure that the user has not modified the 620 * psl to gain improper privileges or to cause 621 * a machine fault. 622 */ 623 int 624 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 625 { 626 struct proc *p = td->td_proc; 627 struct l_ucontext uc; 628 struct l_sigcontext *context; 629 l_stack_t *lss; 630 stack_t ss; 631 struct trapframe *regs; 632 int eflags; 633 634 regs = td->td_frame; 635 636 #ifdef DEBUG 637 if (ldebug(rt_sigreturn)) 638 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 639 #endif 640 /* 641 * The trampoline code hands us the ucontext. 642 * It is unsafe to keep track of it ourselves, in the event that a 643 * program jumps out of a signal handler. 644 */ 645 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 646 return (EFAULT); 647 648 context = &uc.uc_mcontext; 649 650 /* 651 * Check for security violations. 652 */ 653 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 654 eflags = context->sc_eflags; 655 /* 656 * XXX do allow users to change the privileged flag PSL_RF. The 657 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 658 * sometimes set it there too. tf_eflags is kept in the signal 659 * context during signal handling and there is no other place 660 * to remember it, so the PSL_RF bit may be corrupted by the 661 * signal handler without us knowing. Corruption of the PSL_RF 662 * bit at worst causes one more or one less debugger trap, so 663 * allowing it is fairly harmless. 664 */ 665 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 666 return(EINVAL); 667 668 /* 669 * Don't allow users to load a valid privileged %cs. Let the 670 * hardware check for invalid selectors, excess privilege in 671 * other selectors, invalid %eip's and invalid %esp's. 672 */ 673 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 674 if (!CS_SECURE(context->sc_cs)) { 675 trapsignal(td, SIGBUS, T_PROTFLT); 676 return(EINVAL); 677 } 678 679 PROC_LOCK(p); 680 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 681 SIG_CANTMASK(td->td_sigmask); 682 signotify(td); 683 PROC_UNLOCK(p); 684 685 /* 686 * Restore signal context 687 */ 688 /* Selectors were restored by the trampoline. */ 689 regs->tf_rdi = context->sc_edi; 690 regs->tf_rsi = context->sc_esi; 691 regs->tf_rbp = context->sc_ebp; 692 regs->tf_rbx = context->sc_ebx; 693 regs->tf_rdx = context->sc_edx; 694 regs->tf_rcx = context->sc_ecx; 695 regs->tf_rax = context->sc_eax; 696 regs->tf_rip = context->sc_eip; 697 regs->tf_cs = context->sc_cs; 698 regs->tf_rflags = eflags; 699 regs->tf_rsp = context->sc_esp_at_signal; 700 regs->tf_ss = context->sc_ss; 701 702 /* 703 * call sigaltstack & ignore results.. 704 */ 705 lss = &uc.uc_stack; 706 ss.ss_sp = PTRIN(lss->ss_sp); 707 ss.ss_size = lss->ss_size; 708 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 709 710 #ifdef DEBUG 711 if (ldebug(rt_sigreturn)) 712 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 713 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 714 #endif 715 (void)kern_sigaltstack(td, &ss, NULL); 716 717 return (EJUSTRETURN); 718 } 719 720 /* 721 * MPSAFE 722 */ 723 static void 724 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 725 { 726 args[0] = tf->tf_rbx; 727 args[1] = tf->tf_rcx; 728 args[2] = tf->tf_rdx; 729 args[3] = tf->tf_rsi; 730 args[4] = tf->tf_rdi; 731 args[5] = tf->tf_rbp; /* Unconfirmed */ 732 *params = NULL; /* no copyin */ 733 } 734 735 /* 736 * If a linux binary is exec'ing something, try this image activator 737 * first. We override standard shell script execution in order to 738 * be able to modify the interpreter path. We only do this if a linux 739 * binary is doing the exec, so we do not create an EXEC module for it. 740 */ 741 static int exec_linux_imgact_try(struct image_params *iparams); 742 743 static int 744 exec_linux_imgact_try(struct image_params *imgp) 745 { 746 const char *head = (const char *)imgp->image_header; 747 int error = -1; 748 749 /* 750 * The interpreter for shell scripts run from a linux binary needs 751 * to be located in /compat/linux if possible in order to recursively 752 * maintain linux path emulation. 753 */ 754 if (((const short *)head)[0] == SHELLMAGIC) { 755 /* 756 * Run our normal shell image activator. If it succeeds attempt 757 * to use the alternate path for the interpreter. If an alternate 758 * path is found, use our stringspace to store it. 759 */ 760 if ((error = exec_shell_imgact(imgp)) == 0) { 761 char *rpath = NULL; 762 763 linux_emul_find(FIRST_THREAD_IN_PROC(imgp->proc), NULL, 764 imgp->interpreter_name, &rpath, 0); 765 if (rpath != imgp->interpreter_name) { 766 int len = strlen(rpath) + 1; 767 768 if (len <= MAXSHELLCMDLEN) { 769 memcpy(imgp->interpreter_name, rpath, len); 770 } 771 free(rpath, M_TEMP); 772 } 773 } 774 } 775 return(error); 776 } 777 778 /* 779 * Clear registers on exec 780 * XXX copied from ia32_signal.c. 781 */ 782 static void 783 exec_linux_setregs(td, entry, stack, ps_strings) 784 struct thread *td; 785 u_long entry; 786 u_long stack; 787 u_long ps_strings; 788 { 789 struct trapframe *regs = td->td_frame; 790 struct pcb *pcb = td->td_pcb; 791 792 wrmsr(MSR_FSBASE, 0); 793 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 794 pcb->pcb_fsbase = 0; 795 pcb->pcb_gsbase = 0; 796 load_ds(_udatasel); 797 load_es(_udatasel); 798 load_fs(_udatasel); 799 load_gs(0); 800 pcb->pcb_ds = _udatasel; 801 pcb->pcb_es = _udatasel; 802 pcb->pcb_fs = _udatasel; 803 pcb->pcb_gs = 0; 804 805 bzero((char *)regs, sizeof(struct trapframe)); 806 regs->tf_rip = entry; 807 regs->tf_rsp = stack; 808 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 809 regs->tf_ss = _udatasel; 810 regs->tf_cs = _ucode32sel; 811 regs->tf_rbx = ps_strings; 812 load_cr0(rcr0() | CR0_MP | CR0_TS); 813 814 /* Return via doreti so that we can change to a different %cs */ 815 pcb->pcb_flags |= PCB_FULLCTX; 816 td->td_retval[1] = 0; 817 } 818 819 /* 820 * XXX copied from ia32_sysvec.c. 821 */ 822 static register_t * 823 linux_copyout_strings(struct image_params *imgp) 824 { 825 int argc, envc; 826 u_int32_t *vectp; 827 char *stringp, *destp; 828 u_int32_t *stack_base; 829 struct linux32_ps_strings *arginfo; 830 int sigcodesz; 831 832 /* 833 * Calculate string base and vector table pointers. 834 * Also deal with signal trampoline code for this exec type. 835 */ 836 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 837 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode); 838 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE - 839 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 840 841 /* 842 * install sigcode 843 */ 844 if (sigcodesz) 845 copyout(imgp->proc->p_sysent->sv_sigcode, 846 ((caddr_t)arginfo - sigcodesz), szsigcode); 847 848 /* 849 * If we have a valid auxargs ptr, prepare some room 850 * on the stack. 851 */ 852 if (imgp->auxargs) { 853 /* 854 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 855 * lower compatibility. 856 */ 857 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size 858 : (AT_COUNT * 2); 859 /* 860 * The '+ 2' is for the null pointers at the end of each of 861 * the arg and env vector sets,and imgp->auxarg_size is room 862 * for argument of Runtime loader. 863 */ 864 vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + 865 imgp->auxarg_size) * sizeof(u_int32_t)); 866 867 } else 868 /* 869 * The '+ 2' is for the null pointers at the end of each of 870 * the arg and env vector sets 871 */ 872 vectp = (u_int32_t *) 873 (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); 874 875 /* 876 * vectp also becomes our initial stack base 877 */ 878 stack_base = vectp; 879 880 stringp = imgp->args->begin_argv; 881 argc = imgp->args->argc; 882 envc = imgp->args->envc; 883 /* 884 * Copy out strings - arguments and environment. 885 */ 886 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 887 888 /* 889 * Fill in "ps_strings" struct for ps, w, etc. 890 */ 891 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); 892 suword32(&arginfo->ps_nargvstr, argc); 893 894 /* 895 * Fill in argument portion of vector table. 896 */ 897 for (; argc > 0; --argc) { 898 suword32(vectp++, (u_int32_t)(intptr_t)destp); 899 while (*stringp++ != 0) 900 destp++; 901 destp++; 902 } 903 904 /* a null vector table pointer separates the argp's from the envp's */ 905 suword32(vectp++, 0); 906 907 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); 908 suword32(&arginfo->ps_nenvstr, envc); 909 910 /* 911 * Fill in environment portion of vector table. 912 */ 913 for (; envc > 0; --envc) { 914 suword32(vectp++, (u_int32_t)(intptr_t)destp); 915 while (*stringp++ != 0) 916 destp++; 917 destp++; 918 } 919 920 /* end of vector table is a null pointer */ 921 suword32(vectp, 0); 922 923 return ((register_t *)stack_base); 924 } 925 926 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 927 "32-bit Linux emulation"); 928 929 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 930 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 931 &linux32_maxdsiz, 0, ""); 932 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 933 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 934 &linux32_maxssiz, 0, ""); 935 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 936 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 937 &linux32_maxvmem, 0, ""); 938 939 /* 940 * XXX copied from ia32_sysvec.c. 941 */ 942 static void 943 linux32_fixlimits(struct image_params *imgp) 944 { 945 struct proc *p = imgp->proc; 946 struct plimit *oldlim, *newlim; 947 948 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 && 949 linux32_maxvmem == 0) 950 return; 951 newlim = lim_alloc(); 952 PROC_LOCK(p); 953 oldlim = p->p_limit; 954 lim_copy(newlim, oldlim); 955 if (linux32_maxdsiz != 0) { 956 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz) 957 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz; 958 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz) 959 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz; 960 } 961 if (linux32_maxssiz != 0) { 962 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz) 963 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz; 964 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz) 965 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz; 966 } 967 if (linux32_maxvmem != 0) { 968 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem) 969 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem; 970 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem) 971 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem; 972 } 973 p->p_limit = newlim; 974 PROC_UNLOCK(p); 975 lim_free(oldlim); 976 } 977 978 struct sysentvec elf_linux_sysvec = { 979 LINUX_SYS_MAXSYSCALL, 980 linux_sysent, 981 0xff, 982 LINUX_SIGTBLSZ, 983 bsd_to_linux_signal, 984 ELAST + 1, 985 bsd_to_linux_errno, 986 translate_traps, 987 elf_linux_fixup, 988 linux_sendsig, 989 linux_sigcode, 990 &linux_szsigcode, 991 linux_prepsyscall, 992 "Linux ELF32", 993 elf32_coredump, 994 exec_linux_imgact_try, 995 LINUX_MINSIGSTKSZ, 996 PAGE_SIZE, 997 VM_MIN_ADDRESS, 998 LINUX32_USRSTACK, 999 LINUX32_USRSTACK, 1000 LINUX32_PS_STRINGS, 1001 VM_PROT_ALL, 1002 linux_copyout_strings, 1003 exec_linux_setregs, 1004 linux32_fixlimits 1005 }; 1006 1007 static Elf32_Brandinfo linux_brand = { 1008 ELFOSABI_LINUX, 1009 EM_386, 1010 "Linux", 1011 "/compat/linux", 1012 "/lib/ld-linux.so.1", 1013 &elf_linux_sysvec, 1014 NULL, 1015 }; 1016 1017 static Elf32_Brandinfo linux_glibc2brand = { 1018 ELFOSABI_LINUX, 1019 EM_386, 1020 "Linux", 1021 "/compat/linux", 1022 "/lib/ld-linux.so.2", 1023 &elf_linux_sysvec, 1024 NULL, 1025 }; 1026 1027 Elf32_Brandinfo *linux_brandlist[] = { 1028 &linux_brand, 1029 &linux_glibc2brand, 1030 NULL 1031 }; 1032 1033 static int 1034 linux_elf_modevent(module_t mod, int type, void *data) 1035 { 1036 Elf32_Brandinfo **brandinfo; 1037 int error; 1038 struct linux_ioctl_handler **lihp; 1039 1040 error = 0; 1041 1042 switch(type) { 1043 case MOD_LOAD: 1044 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1045 ++brandinfo) 1046 if (elf32_insert_brand_entry(*brandinfo) < 0) 1047 error = EINVAL; 1048 if (error == 0) { 1049 SET_FOREACH(lihp, linux_ioctl_handler_set) 1050 linux_ioctl_register_handler(*lihp); 1051 if (bootverbose) 1052 printf("Linux ELF exec handler installed\n"); 1053 } else 1054 printf("cannot insert Linux ELF brand handler\n"); 1055 break; 1056 case MOD_UNLOAD: 1057 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1058 ++brandinfo) 1059 if (elf32_brand_inuse(*brandinfo)) 1060 error = EBUSY; 1061 if (error == 0) { 1062 for (brandinfo = &linux_brandlist[0]; 1063 *brandinfo != NULL; ++brandinfo) 1064 if (elf32_remove_brand_entry(*brandinfo) < 0) 1065 error = EINVAL; 1066 } 1067 if (error == 0) { 1068 SET_FOREACH(lihp, linux_ioctl_handler_set) 1069 linux_ioctl_unregister_handler(*lihp); 1070 if (bootverbose) 1071 printf("Linux ELF exec handler removed\n"); 1072 linux_mib_destroy(); 1073 } else 1074 printf("Could not deinstall ELF interpreter entry\n"); 1075 break; 1076 default: 1077 break; 1078 } 1079 return error; 1080 } 1081 1082 static moduledata_t linux_elf_mod = { 1083 "linuxelf", 1084 linux_elf_modevent, 1085 0 1086 }; 1087 1088 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1089