1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /* XXX we use functions that might not exist. */ 37 #include "opt_compat.h" 38 39 #ifndef COMPAT_43 40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 41 #endif 42 #ifndef COMPAT_IA32 43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!" 44 #endif 45 46 #define __ELF_WORD_SIZE 32 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/exec.h> 51 #include <sys/imgact.h> 52 #include <sys/imgact_elf.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/module.h> 57 #include <sys/mutex.h> 58 #include <sys/proc.h> 59 #include <sys/resourcevar.h> 60 #include <sys/signalvar.h> 61 #include <sys/sysctl.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysent.h> 64 #include <sys/sysproto.h> 65 #include <sys/vnode.h> 66 67 #include <vm/vm.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_param.h> 74 75 #include <machine/cpu.h> 76 #include <machine/md_var.h> 77 #include <machine/pcb.h> 78 #include <machine/specialreg.h> 79 80 #include <amd64/linux32/linux.h> 81 #include <amd64/linux32/linux32_proto.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_signal.h> 84 #include <compat/linux/linux_util.h> 85 86 MODULE_VERSION(linux, 1); 87 88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 89 90 #define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96 #if BYTE_ORDER == LITTLE_ENDIAN 97 #define SHELLMAGIC 0x2123 /* #! */ 98 #else 99 #define SHELLMAGIC 0x2321 100 #endif 101 102 /* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108 #define LINUX_SYS_linux_rt_sendsig 0 109 #define LINUX_SYS_linux_sendsig 0 110 111 extern char linux_sigcode[]; 112 extern int linux_szsigcode; 113 114 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 115 116 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 117 118 static int elf_linux_fixup(register_t **stack_base, 119 struct image_params *iparams); 120 static register_t *linux_copyout_strings(struct image_params *imgp); 121 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 122 caddr_t *params); 123 static void linux_sendsig(sig_t catcher, int sig, sigset_t *mask, 124 u_long code); 125 static void exec_linux_setregs(struct thread *td, u_long entry, 126 u_long stack, u_long ps_strings); 127 static void linux32_fixlimits(struct image_params *imgp); 128 129 /* 130 * Linux syscalls return negative errno's, we do positive and map them 131 */ 132 static int bsd_to_linux_errno[ELAST + 1] = { 133 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 134 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 135 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 136 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 137 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 138 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 139 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 140 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 141 -6, -6, -43, -42, -75, -6, -84 142 }; 143 144 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 145 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 146 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 147 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 148 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 149 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 150 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 151 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 152 0, LINUX_SIGUSR1, LINUX_SIGUSR2 153 }; 154 155 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 156 SIGHUP, SIGINT, SIGQUIT, SIGILL, 157 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 158 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 159 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 160 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 161 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 162 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 163 SIGIO, SIGURG, SIGSYS 164 }; 165 166 #define LINUX_T_UNKNOWN 255 167 static int _bsd_to_linux_trapcode[] = { 168 LINUX_T_UNKNOWN, /* 0 */ 169 6, /* 1 T_PRIVINFLT */ 170 LINUX_T_UNKNOWN, /* 2 */ 171 3, /* 3 T_BPTFLT */ 172 LINUX_T_UNKNOWN, /* 4 */ 173 LINUX_T_UNKNOWN, /* 5 */ 174 16, /* 6 T_ARITHTRAP */ 175 254, /* 7 T_ASTFLT */ 176 LINUX_T_UNKNOWN, /* 8 */ 177 13, /* 9 T_PROTFLT */ 178 1, /* 10 T_TRCTRAP */ 179 LINUX_T_UNKNOWN, /* 11 */ 180 14, /* 12 T_PAGEFLT */ 181 LINUX_T_UNKNOWN, /* 13 */ 182 17, /* 14 T_ALIGNFLT */ 183 LINUX_T_UNKNOWN, /* 15 */ 184 LINUX_T_UNKNOWN, /* 16 */ 185 LINUX_T_UNKNOWN, /* 17 */ 186 0, /* 18 T_DIVIDE */ 187 2, /* 19 T_NMI */ 188 4, /* 20 T_OFLOW */ 189 5, /* 21 T_BOUND */ 190 7, /* 22 T_DNA */ 191 8, /* 23 T_DOUBLEFLT */ 192 9, /* 24 T_FPOPFLT */ 193 10, /* 25 T_TSSFLT */ 194 11, /* 26 T_SEGNPFLT */ 195 12, /* 27 T_STKFLT */ 196 18, /* 28 T_MCHK */ 197 19, /* 29 T_XMMFLT */ 198 15 /* 30 T_RESERVED */ 199 }; 200 #define bsd_to_linux_trapcode(code) \ 201 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 202 _bsd_to_linux_trapcode[(code)]: \ 203 LINUX_T_UNKNOWN) 204 205 struct linux32_ps_strings { 206 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 207 u_int ps_nargvstr; /* the number of argument strings */ 208 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 209 u_int ps_nenvstr; /* the number of environment strings */ 210 }; 211 212 /* 213 * If FreeBSD & Linux have a difference of opinion about what a trap 214 * means, deal with it here. 215 * 216 * MPSAFE 217 */ 218 static int 219 translate_traps(int signal, int trap_code) 220 { 221 if (signal != SIGBUS) 222 return signal; 223 switch (trap_code) { 224 case T_PROTFLT: 225 case T_TSSFLT: 226 case T_DOUBLEFLT: 227 case T_PAGEFLT: 228 return SIGSEGV; 229 default: 230 return signal; 231 } 232 } 233 234 static int 235 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 236 { 237 Elf32_Auxargs *args; 238 Elf32_Addr *base; 239 Elf32_Addr *pos; 240 241 KASSERT(curthread->td_proc == imgp->proc && 242 (curthread->td_proc->p_flag & P_SA) == 0, 243 ("unsafe elf_linux_fixup(), should be curproc")); 244 base = (Elf32_Addr *)*stack_base; 245 args = (Elf32_Auxargs *)imgp->auxargs; 246 pos = base + (imgp->args->argc + imgp->args->envc + 2); 247 248 if (args->trace) 249 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1); 250 if (args->execfd != -1) 251 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 252 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 253 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 254 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 255 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 256 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 257 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 258 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 259 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 260 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 261 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 262 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 263 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 264 265 free(imgp->auxargs, M_TEMP); 266 imgp->auxargs = NULL; 267 268 base--; 269 suword32(base, (uint32_t)imgp->args->argc); 270 *stack_base = (register_t *)base; 271 return 0; 272 } 273 274 extern int _ucodesel, _ucode32sel, _udatasel; 275 extern unsigned long linux_sznonrtsigcode; 276 277 static void 278 linux_rt_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 279 { 280 struct thread *td = curthread; 281 struct proc *p = td->td_proc; 282 struct sigacts *psp; 283 struct trapframe *regs; 284 struct l_rt_sigframe *fp, frame; 285 int oonstack; 286 287 PROC_LOCK_ASSERT(p, MA_OWNED); 288 psp = p->p_sigacts; 289 mtx_assert(&psp->ps_mtx, MA_OWNED); 290 regs = td->td_frame; 291 oonstack = sigonstack(regs->tf_rsp); 292 293 #ifdef DEBUG 294 if (ldebug(rt_sendsig)) 295 printf(ARGS(rt_sendsig, "%p, %d, %p, %lu"), 296 catcher, sig, (void*)mask, code); 297 #endif 298 /* 299 * Allocate space for the signal handler context. 300 */ 301 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 302 SIGISMEMBER(psp->ps_sigonstack, sig)) { 303 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 304 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 305 } else 306 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 307 mtx_unlock(&psp->ps_mtx); 308 309 /* 310 * Build the argument list for the signal handler. 311 */ 312 if (p->p_sysent->sv_sigtbl) 313 if (sig <= p->p_sysent->sv_sigsize) 314 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 315 316 bzero(&frame, sizeof(frame)); 317 318 frame.sf_handler = PTROUT(catcher); 319 frame.sf_sig = sig; 320 frame.sf_siginfo = PTROUT(&fp->sf_si); 321 frame.sf_ucontext = PTROUT(&fp->sf_sc); 322 323 /* Fill in POSIX parts */ 324 frame.sf_si.lsi_signo = sig; 325 frame.sf_si.lsi_code = code; 326 frame.sf_si.lsi_addr = PTROUT(regs->tf_err); 327 328 /* 329 * Build the signal context to be used by sigreturn. 330 */ 331 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 332 frame.sf_sc.uc_link = 0; /* XXX ??? */ 333 334 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 335 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 336 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 337 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 338 PROC_UNLOCK(p); 339 340 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 341 342 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 343 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 344 frame.sf_sc.uc_mcontext.sc_fs = rfs(); 345 __asm __volatile("movl %%es,%0" : 346 "=rm" (frame.sf_sc.uc_mcontext.sc_es)); 347 __asm __volatile("movl %%ds,%0" : 348 "=rm" (frame.sf_sc.uc_mcontext.sc_ds)); 349 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 350 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 351 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 352 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 353 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 354 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 355 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 356 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 357 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 358 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 359 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 360 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 361 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 362 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 363 364 #ifdef DEBUG 365 if (ldebug(rt_sendsig)) 366 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 367 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 368 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 369 #endif 370 371 if (copyout(&frame, fp, sizeof(frame)) != 0) { 372 /* 373 * Process has trashed its stack; give it an illegal 374 * instruction to halt it in its tracks. 375 */ 376 #ifdef DEBUG 377 if (ldebug(rt_sendsig)) 378 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 379 fp, oonstack); 380 #endif 381 PROC_LOCK(p); 382 sigexit(td, SIGILL); 383 } 384 385 /* 386 * Build context to run handler in. 387 */ 388 regs->tf_rsp = PTROUT(fp); 389 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 390 linux_sznonrtsigcode; 391 regs->tf_rflags &= ~PSL_T; 392 regs->tf_cs = _ucode32sel; 393 regs->tf_ss = _udatasel; 394 load_ds(_udatasel); 395 td->td_pcb->pcb_ds = _udatasel; 396 load_es(_udatasel); 397 td->td_pcb->pcb_es = _udatasel; 398 PROC_LOCK(p); 399 mtx_lock(&psp->ps_mtx); 400 } 401 402 403 /* 404 * Send an interrupt to process. 405 * 406 * Stack is set up to allow sigcode stored 407 * in u. to call routine, followed by kcall 408 * to sigreturn routine below. After sigreturn 409 * resets the signal mask, the stack, and the 410 * frame pointer, it returns to the user 411 * specified pc, psl. 412 */ 413 static void 414 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code) 415 { 416 struct thread *td = curthread; 417 struct proc *p = td->td_proc; 418 struct sigacts *psp; 419 struct trapframe *regs; 420 struct l_sigframe *fp, frame; 421 l_sigset_t lmask; 422 int oonstack, i; 423 424 PROC_LOCK_ASSERT(p, MA_OWNED); 425 psp = p->p_sigacts; 426 mtx_assert(&psp->ps_mtx, MA_OWNED); 427 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 428 /* Signal handler installed with SA_SIGINFO. */ 429 linux_rt_sendsig(catcher, sig, mask, code); 430 return; 431 } 432 433 regs = td->td_frame; 434 oonstack = sigonstack(regs->tf_rsp); 435 436 #ifdef DEBUG 437 if (ldebug(sendsig)) 438 printf(ARGS(sendsig, "%p, %d, %p, %lu"), 439 catcher, sig, (void*)mask, code); 440 #endif 441 442 /* 443 * Allocate space for the signal handler context. 444 */ 445 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 446 SIGISMEMBER(psp->ps_sigonstack, sig)) { 447 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 448 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 449 } else 450 fp = (struct l_sigframe *)regs->tf_rsp - 1; 451 mtx_unlock(&psp->ps_mtx); 452 PROC_UNLOCK(p); 453 454 /* 455 * Build the argument list for the signal handler. 456 */ 457 if (p->p_sysent->sv_sigtbl) 458 if (sig <= p->p_sysent->sv_sigsize) 459 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 460 461 bzero(&frame, sizeof(frame)); 462 463 frame.sf_handler = PTROUT(catcher); 464 frame.sf_sig = sig; 465 466 bsd_to_linux_sigset(mask, &lmask); 467 468 /* 469 * Build the signal context to be used by sigreturn. 470 */ 471 frame.sf_sc.sc_mask = lmask.__bits[0]; 472 frame.sf_sc.sc_gs = rgs(); 473 frame.sf_sc.sc_fs = rfs(); 474 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es)); 475 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds)); 476 frame.sf_sc.sc_edi = regs->tf_rdi; 477 frame.sf_sc.sc_esi = regs->tf_rsi; 478 frame.sf_sc.sc_ebp = regs->tf_rbp; 479 frame.sf_sc.sc_ebx = regs->tf_rbx; 480 frame.sf_sc.sc_edx = regs->tf_rdx; 481 frame.sf_sc.sc_ecx = regs->tf_rcx; 482 frame.sf_sc.sc_eax = regs->tf_rax; 483 frame.sf_sc.sc_eip = regs->tf_rip; 484 frame.sf_sc.sc_cs = regs->tf_cs; 485 frame.sf_sc.sc_eflags = regs->tf_rflags; 486 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 487 frame.sf_sc.sc_ss = regs->tf_ss; 488 frame.sf_sc.sc_err = regs->tf_err; 489 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 490 491 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 492 frame.sf_extramask[i] = lmask.__bits[i+1]; 493 494 if (copyout(&frame, fp, sizeof(frame)) != 0) { 495 /* 496 * Process has trashed its stack; give it an illegal 497 * instruction to halt it in its tracks. 498 */ 499 PROC_LOCK(p); 500 sigexit(td, SIGILL); 501 } 502 503 /* 504 * Build context to run handler in. 505 */ 506 regs->tf_rsp = PTROUT(fp); 507 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 508 regs->tf_rflags &= ~PSL_T; 509 regs->tf_cs = _ucode32sel; 510 regs->tf_ss = _udatasel; 511 load_ds(_udatasel); 512 td->td_pcb->pcb_ds = _udatasel; 513 load_es(_udatasel); 514 td->td_pcb->pcb_es = _udatasel; 515 PROC_LOCK(p); 516 mtx_lock(&psp->ps_mtx); 517 } 518 519 /* 520 * System call to cleanup state after a signal 521 * has been taken. Reset signal mask and 522 * stack state from context left by sendsig (above). 523 * Return to previous pc and psl as specified by 524 * context left by sendsig. Check carefully to 525 * make sure that the user has not modified the 526 * psl to gain improper privileges or to cause 527 * a machine fault. 528 */ 529 int 530 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 531 { 532 struct proc *p = td->td_proc; 533 struct l_sigframe frame; 534 struct trapframe *regs; 535 l_sigset_t lmask; 536 int eflags, i; 537 538 regs = td->td_frame; 539 540 #ifdef DEBUG 541 if (ldebug(sigreturn)) 542 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 543 #endif 544 /* 545 * The trampoline code hands us the sigframe. 546 * It is unsafe to keep track of it ourselves, in the event that a 547 * program jumps out of a signal handler. 548 */ 549 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 550 return (EFAULT); 551 552 /* 553 * Check for security violations. 554 */ 555 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 556 eflags = frame.sf_sc.sc_eflags; 557 /* 558 * XXX do allow users to change the privileged flag PSL_RF. The 559 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 560 * sometimes set it there too. tf_eflags is kept in the signal 561 * context during signal handling and there is no other place 562 * to remember it, so the PSL_RF bit may be corrupted by the 563 * signal handler without us knowing. Corruption of the PSL_RF 564 * bit at worst causes one more or one less debugger trap, so 565 * allowing it is fairly harmless. 566 */ 567 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 568 return(EINVAL); 569 570 /* 571 * Don't allow users to load a valid privileged %cs. Let the 572 * hardware check for invalid selectors, excess privilege in 573 * other selectors, invalid %eip's and invalid %esp's. 574 */ 575 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 576 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 577 trapsignal(td, SIGBUS, T_PROTFLT); 578 return(EINVAL); 579 } 580 581 lmask.__bits[0] = frame.sf_sc.sc_mask; 582 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 583 lmask.__bits[i+1] = frame.sf_extramask[i]; 584 PROC_LOCK(p); 585 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 586 SIG_CANTMASK(td->td_sigmask); 587 signotify(td); 588 PROC_UNLOCK(p); 589 590 /* 591 * Restore signal context. 592 */ 593 /* Selectors were restored by the trampoline. */ 594 regs->tf_rdi = frame.sf_sc.sc_edi; 595 regs->tf_rsi = frame.sf_sc.sc_esi; 596 regs->tf_rbp = frame.sf_sc.sc_ebp; 597 regs->tf_rbx = frame.sf_sc.sc_ebx; 598 regs->tf_rdx = frame.sf_sc.sc_edx; 599 regs->tf_rcx = frame.sf_sc.sc_ecx; 600 regs->tf_rax = frame.sf_sc.sc_eax; 601 regs->tf_rip = frame.sf_sc.sc_eip; 602 regs->tf_cs = frame.sf_sc.sc_cs; 603 regs->tf_rflags = eflags; 604 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 605 regs->tf_ss = frame.sf_sc.sc_ss; 606 607 return (EJUSTRETURN); 608 } 609 610 /* 611 * System call to cleanup state after a signal 612 * has been taken. Reset signal mask and 613 * stack state from context left by rt_sendsig (above). 614 * Return to previous pc and psl as specified by 615 * context left by sendsig. Check carefully to 616 * make sure that the user has not modified the 617 * psl to gain improper privileges or to cause 618 * a machine fault. 619 */ 620 int 621 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 622 { 623 struct proc *p = td->td_proc; 624 struct l_ucontext uc; 625 struct l_sigcontext *context; 626 l_stack_t *lss; 627 stack_t ss; 628 struct trapframe *regs; 629 int eflags; 630 631 regs = td->td_frame; 632 633 #ifdef DEBUG 634 if (ldebug(rt_sigreturn)) 635 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 636 #endif 637 /* 638 * The trampoline code hands us the ucontext. 639 * It is unsafe to keep track of it ourselves, in the event that a 640 * program jumps out of a signal handler. 641 */ 642 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 643 return (EFAULT); 644 645 context = &uc.uc_mcontext; 646 647 /* 648 * Check for security violations. 649 */ 650 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 651 eflags = context->sc_eflags; 652 /* 653 * XXX do allow users to change the privileged flag PSL_RF. The 654 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 655 * sometimes set it there too. tf_eflags is kept in the signal 656 * context during signal handling and there is no other place 657 * to remember it, so the PSL_RF bit may be corrupted by the 658 * signal handler without us knowing. Corruption of the PSL_RF 659 * bit at worst causes one more or one less debugger trap, so 660 * allowing it is fairly harmless. 661 */ 662 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 663 return(EINVAL); 664 665 /* 666 * Don't allow users to load a valid privileged %cs. Let the 667 * hardware check for invalid selectors, excess privilege in 668 * other selectors, invalid %eip's and invalid %esp's. 669 */ 670 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 671 if (!CS_SECURE(context->sc_cs)) { 672 trapsignal(td, SIGBUS, T_PROTFLT); 673 return(EINVAL); 674 } 675 676 PROC_LOCK(p); 677 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 678 SIG_CANTMASK(td->td_sigmask); 679 signotify(td); 680 PROC_UNLOCK(p); 681 682 /* 683 * Restore signal context 684 */ 685 /* Selectors were restored by the trampoline. */ 686 regs->tf_rdi = context->sc_edi; 687 regs->tf_rsi = context->sc_esi; 688 regs->tf_rbp = context->sc_ebp; 689 regs->tf_rbx = context->sc_ebx; 690 regs->tf_rdx = context->sc_edx; 691 regs->tf_rcx = context->sc_ecx; 692 regs->tf_rax = context->sc_eax; 693 regs->tf_rip = context->sc_eip; 694 regs->tf_cs = context->sc_cs; 695 regs->tf_rflags = eflags; 696 regs->tf_rsp = context->sc_esp_at_signal; 697 regs->tf_ss = context->sc_ss; 698 699 /* 700 * call sigaltstack & ignore results.. 701 */ 702 lss = &uc.uc_stack; 703 ss.ss_sp = PTRIN(lss->ss_sp); 704 ss.ss_size = lss->ss_size; 705 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 706 707 #ifdef DEBUG 708 if (ldebug(rt_sigreturn)) 709 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 710 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 711 #endif 712 (void)kern_sigaltstack(td, &ss, NULL); 713 714 return (EJUSTRETURN); 715 } 716 717 /* 718 * MPSAFE 719 */ 720 static void 721 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 722 { 723 args[0] = tf->tf_rbx; 724 args[1] = tf->tf_rcx; 725 args[2] = tf->tf_rdx; 726 args[3] = tf->tf_rsi; 727 args[4] = tf->tf_rdi; 728 args[5] = tf->tf_rbp; /* Unconfirmed */ 729 *params = NULL; /* no copyin */ 730 } 731 732 /* 733 * If a linux binary is exec'ing something, try this image activator 734 * first. We override standard shell script execution in order to 735 * be able to modify the interpreter path. We only do this if a linux 736 * binary is doing the exec, so we do not create an EXEC module for it. 737 */ 738 static int exec_linux_imgact_try(struct image_params *iparams); 739 740 static int 741 exec_linux_imgact_try(struct image_params *imgp) 742 { 743 const char *head = (const char *)imgp->image_header; 744 char *rpath; 745 int error = -1, len; 746 747 /* 748 * The interpreter for shell scripts run from a linux binary needs 749 * to be located in /compat/linux if possible in order to recursively 750 * maintain linux path emulation. 751 */ 752 if (((const short *)head)[0] == SHELLMAGIC) { 753 /* 754 * Run our normal shell image activator. If it succeeds attempt 755 * to use the alternate path for the interpreter. If an alternate 756 * path is found, use our stringspace to store it. 757 */ 758 if ((error = exec_shell_imgact(imgp)) == 0) { 759 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 760 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 761 if (rpath != NULL) { 762 len = strlen(rpath) + 1; 763 764 if (len <= MAXSHELLCMDLEN) { 765 memcpy(imgp->interpreter_name, rpath, len); 766 } 767 free(rpath, M_TEMP); 768 } 769 } 770 } 771 return(error); 772 } 773 774 /* 775 * Clear registers on exec 776 * XXX copied from ia32_signal.c. 777 */ 778 static void 779 exec_linux_setregs(td, entry, stack, ps_strings) 780 struct thread *td; 781 u_long entry; 782 u_long stack; 783 u_long ps_strings; 784 { 785 struct trapframe *regs = td->td_frame; 786 struct pcb *pcb = td->td_pcb; 787 788 wrmsr(MSR_FSBASE, 0); 789 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 790 pcb->pcb_fsbase = 0; 791 pcb->pcb_gsbase = 0; 792 load_ds(_udatasel); 793 load_es(_udatasel); 794 load_fs(_udatasel); 795 load_gs(0); 796 pcb->pcb_ds = _udatasel; 797 pcb->pcb_es = _udatasel; 798 pcb->pcb_fs = _udatasel; 799 pcb->pcb_gs = 0; 800 801 bzero((char *)regs, sizeof(struct trapframe)); 802 regs->tf_rip = entry; 803 regs->tf_rsp = stack; 804 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 805 regs->tf_ss = _udatasel; 806 regs->tf_cs = _ucode32sel; 807 regs->tf_rbx = ps_strings; 808 load_cr0(rcr0() | CR0_MP | CR0_TS); 809 810 /* Return via doreti so that we can change to a different %cs */ 811 pcb->pcb_flags |= PCB_FULLCTX; 812 td->td_retval[1] = 0; 813 } 814 815 /* 816 * XXX copied from ia32_sysvec.c. 817 */ 818 static register_t * 819 linux_copyout_strings(struct image_params *imgp) 820 { 821 int argc, envc; 822 u_int32_t *vectp; 823 char *stringp, *destp; 824 u_int32_t *stack_base; 825 struct linux32_ps_strings *arginfo; 826 int sigcodesz; 827 828 /* 829 * Calculate string base and vector table pointers. 830 * Also deal with signal trampoline code for this exec type. 831 */ 832 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 833 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode); 834 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE - 835 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 836 837 /* 838 * install sigcode 839 */ 840 if (sigcodesz) 841 copyout(imgp->proc->p_sysent->sv_sigcode, 842 ((caddr_t)arginfo - sigcodesz), szsigcode); 843 844 /* 845 * If we have a valid auxargs ptr, prepare some room 846 * on the stack. 847 */ 848 if (imgp->auxargs) { 849 /* 850 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 851 * lower compatibility. 852 */ 853 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size 854 : (AT_COUNT * 2); 855 /* 856 * The '+ 2' is for the null pointers at the end of each of 857 * the arg and env vector sets,and imgp->auxarg_size is room 858 * for argument of Runtime loader. 859 */ 860 vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + 861 imgp->auxarg_size) * sizeof(u_int32_t)); 862 863 } else 864 /* 865 * The '+ 2' is for the null pointers at the end of each of 866 * the arg and env vector sets 867 */ 868 vectp = (u_int32_t *) 869 (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); 870 871 /* 872 * vectp also becomes our initial stack base 873 */ 874 stack_base = vectp; 875 876 stringp = imgp->args->begin_argv; 877 argc = imgp->args->argc; 878 envc = imgp->args->envc; 879 /* 880 * Copy out strings - arguments and environment. 881 */ 882 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 883 884 /* 885 * Fill in "ps_strings" struct for ps, w, etc. 886 */ 887 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); 888 suword32(&arginfo->ps_nargvstr, argc); 889 890 /* 891 * Fill in argument portion of vector table. 892 */ 893 for (; argc > 0; --argc) { 894 suword32(vectp++, (u_int32_t)(intptr_t)destp); 895 while (*stringp++ != 0) 896 destp++; 897 destp++; 898 } 899 900 /* a null vector table pointer separates the argp's from the envp's */ 901 suword32(vectp++, 0); 902 903 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); 904 suword32(&arginfo->ps_nenvstr, envc); 905 906 /* 907 * Fill in environment portion of vector table. 908 */ 909 for (; envc > 0; --envc) { 910 suword32(vectp++, (u_int32_t)(intptr_t)destp); 911 while (*stringp++ != 0) 912 destp++; 913 destp++; 914 } 915 916 /* end of vector table is a null pointer */ 917 suword32(vectp, 0); 918 919 return ((register_t *)stack_base); 920 } 921 922 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 923 "32-bit Linux emulation"); 924 925 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 926 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 927 &linux32_maxdsiz, 0, ""); 928 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 929 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 930 &linux32_maxssiz, 0, ""); 931 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 932 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 933 &linux32_maxvmem, 0, ""); 934 935 /* 936 * XXX copied from ia32_sysvec.c. 937 */ 938 static void 939 linux32_fixlimits(struct image_params *imgp) 940 { 941 struct proc *p = imgp->proc; 942 struct plimit *oldlim, *newlim; 943 944 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 && 945 linux32_maxvmem == 0) 946 return; 947 newlim = lim_alloc(); 948 PROC_LOCK(p); 949 oldlim = p->p_limit; 950 lim_copy(newlim, oldlim); 951 if (linux32_maxdsiz != 0) { 952 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz) 953 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz; 954 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz) 955 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz; 956 } 957 if (linux32_maxssiz != 0) { 958 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz) 959 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz; 960 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz) 961 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz; 962 } 963 if (linux32_maxvmem != 0) { 964 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem) 965 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem; 966 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem) 967 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem; 968 } 969 p->p_limit = newlim; 970 PROC_UNLOCK(p); 971 lim_free(oldlim); 972 } 973 974 struct sysentvec elf_linux_sysvec = { 975 LINUX_SYS_MAXSYSCALL, 976 linux_sysent, 977 0xff, 978 LINUX_SIGTBLSZ, 979 bsd_to_linux_signal, 980 ELAST + 1, 981 bsd_to_linux_errno, 982 translate_traps, 983 elf_linux_fixup, 984 linux_sendsig, 985 linux_sigcode, 986 &linux_szsigcode, 987 linux_prepsyscall, 988 "Linux ELF32", 989 elf32_coredump, 990 exec_linux_imgact_try, 991 LINUX_MINSIGSTKSZ, 992 PAGE_SIZE, 993 VM_MIN_ADDRESS, 994 LINUX32_USRSTACK, 995 LINUX32_USRSTACK, 996 LINUX32_PS_STRINGS, 997 VM_PROT_ALL, 998 linux_copyout_strings, 999 exec_linux_setregs, 1000 linux32_fixlimits 1001 }; 1002 1003 static Elf32_Brandinfo linux_brand = { 1004 ELFOSABI_LINUX, 1005 EM_386, 1006 "Linux", 1007 "/compat/linux", 1008 "/lib/ld-linux.so.1", 1009 &elf_linux_sysvec, 1010 NULL, 1011 }; 1012 1013 static Elf32_Brandinfo linux_glibc2brand = { 1014 ELFOSABI_LINUX, 1015 EM_386, 1016 "Linux", 1017 "/compat/linux", 1018 "/lib/ld-linux.so.2", 1019 &elf_linux_sysvec, 1020 NULL, 1021 }; 1022 1023 Elf32_Brandinfo *linux_brandlist[] = { 1024 &linux_brand, 1025 &linux_glibc2brand, 1026 NULL 1027 }; 1028 1029 static int 1030 linux_elf_modevent(module_t mod, int type, void *data) 1031 { 1032 Elf32_Brandinfo **brandinfo; 1033 int error; 1034 struct linux_ioctl_handler **lihp; 1035 1036 error = 0; 1037 1038 switch(type) { 1039 case MOD_LOAD: 1040 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1041 ++brandinfo) 1042 if (elf32_insert_brand_entry(*brandinfo) < 0) 1043 error = EINVAL; 1044 if (error == 0) { 1045 SET_FOREACH(lihp, linux_ioctl_handler_set) 1046 linux_ioctl_register_handler(*lihp); 1047 if (bootverbose) 1048 printf("Linux ELF exec handler installed\n"); 1049 } else 1050 printf("cannot insert Linux ELF brand handler\n"); 1051 break; 1052 case MOD_UNLOAD: 1053 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1054 ++brandinfo) 1055 if (elf32_brand_inuse(*brandinfo)) 1056 error = EBUSY; 1057 if (error == 0) { 1058 for (brandinfo = &linux_brandlist[0]; 1059 *brandinfo != NULL; ++brandinfo) 1060 if (elf32_remove_brand_entry(*brandinfo) < 0) 1061 error = EINVAL; 1062 } 1063 if (error == 0) { 1064 SET_FOREACH(lihp, linux_ioctl_handler_set) 1065 linux_ioctl_unregister_handler(*lihp); 1066 if (bootverbose) 1067 printf("Linux ELF exec handler removed\n"); 1068 linux_mib_destroy(); 1069 } else 1070 printf("Could not deinstall ELF interpreter entry\n"); 1071 break; 1072 default: 1073 break; 1074 } 1075 return error; 1076 } 1077 1078 static moduledata_t linux_elf_mod = { 1079 "linuxelf", 1080 linux_elf_modevent, 1081 0 1082 }; 1083 1084 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1085