1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 /* XXX we use functions that might not exist. */ 37 #include "opt_compat.h" 38 39 #ifndef COMPAT_43 40 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!" 41 #endif 42 #ifndef COMPAT_IA32 43 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!" 44 #endif 45 46 #define __ELF_WORD_SIZE 32 47 48 #include <sys/param.h> 49 #include <sys/systm.h> 50 #include <sys/exec.h> 51 #include <sys/imgact.h> 52 #include <sys/imgact_elf.h> 53 #include <sys/kernel.h> 54 #include <sys/lock.h> 55 #include <sys/malloc.h> 56 #include <sys/module.h> 57 #include <sys/mutex.h> 58 #include <sys/proc.h> 59 #include <sys/resourcevar.h> 60 #include <sys/signalvar.h> 61 #include <sys/sysctl.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysent.h> 64 #include <sys/sysproto.h> 65 #include <sys/vnode.h> 66 67 #include <vm/vm.h> 68 #include <vm/pmap.h> 69 #include <vm/vm_extern.h> 70 #include <vm/vm_map.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_page.h> 73 #include <vm/vm_param.h> 74 75 #include <machine/cpu.h> 76 #include <machine/md_var.h> 77 #include <machine/pcb.h> 78 #include <machine/specialreg.h> 79 80 #include <amd64/linux32/linux.h> 81 #include <amd64/linux32/linux32_proto.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_signal.h> 84 #include <compat/linux/linux_util.h> 85 86 MODULE_VERSION(linux, 1); 87 88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 89 90 #define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96 #if BYTE_ORDER == LITTLE_ENDIAN 97 #define SHELLMAGIC 0x2123 /* #! */ 98 #else 99 #define SHELLMAGIC 0x2321 100 #endif 101 102 /* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108 #define LINUX_SYS_linux_rt_sendsig 0 109 #define LINUX_SYS_linux_sendsig 0 110 111 extern char linux_sigcode[]; 112 extern int linux_szsigcode; 113 114 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 115 116 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 117 118 static int elf_linux_fixup(register_t **stack_base, 119 struct image_params *iparams); 120 static register_t *linux_copyout_strings(struct image_params *imgp); 121 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 122 caddr_t *params); 123 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 124 static void exec_linux_setregs(struct thread *td, u_long entry, 125 u_long stack, u_long ps_strings); 126 static void linux32_fixlimits(struct proc *p); 127 128 /* 129 * Linux syscalls return negative errno's, we do positive and map them 130 */ 131 static int bsd_to_linux_errno[ELAST + 1] = { 132 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 133 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 134 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 135 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 136 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 137 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 138 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 139 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 140 -6, -6, -43, -42, -75, -6, -84 141 }; 142 143 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 144 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 145 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 146 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 147 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 148 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 149 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 150 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 151 0, LINUX_SIGUSR1, LINUX_SIGUSR2 152 }; 153 154 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 155 SIGHUP, SIGINT, SIGQUIT, SIGILL, 156 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 157 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 158 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 159 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 160 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 161 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 162 SIGIO, SIGURG, SIGSYS 163 }; 164 165 #define LINUX_T_UNKNOWN 255 166 static int _bsd_to_linux_trapcode[] = { 167 LINUX_T_UNKNOWN, /* 0 */ 168 6, /* 1 T_PRIVINFLT */ 169 LINUX_T_UNKNOWN, /* 2 */ 170 3, /* 3 T_BPTFLT */ 171 LINUX_T_UNKNOWN, /* 4 */ 172 LINUX_T_UNKNOWN, /* 5 */ 173 16, /* 6 T_ARITHTRAP */ 174 254, /* 7 T_ASTFLT */ 175 LINUX_T_UNKNOWN, /* 8 */ 176 13, /* 9 T_PROTFLT */ 177 1, /* 10 T_TRCTRAP */ 178 LINUX_T_UNKNOWN, /* 11 */ 179 14, /* 12 T_PAGEFLT */ 180 LINUX_T_UNKNOWN, /* 13 */ 181 17, /* 14 T_ALIGNFLT */ 182 LINUX_T_UNKNOWN, /* 15 */ 183 LINUX_T_UNKNOWN, /* 16 */ 184 LINUX_T_UNKNOWN, /* 17 */ 185 0, /* 18 T_DIVIDE */ 186 2, /* 19 T_NMI */ 187 4, /* 20 T_OFLOW */ 188 5, /* 21 T_BOUND */ 189 7, /* 22 T_DNA */ 190 8, /* 23 T_DOUBLEFLT */ 191 9, /* 24 T_FPOPFLT */ 192 10, /* 25 T_TSSFLT */ 193 11, /* 26 T_SEGNPFLT */ 194 12, /* 27 T_STKFLT */ 195 18, /* 28 T_MCHK */ 196 19, /* 29 T_XMMFLT */ 197 15 /* 30 T_RESERVED */ 198 }; 199 #define bsd_to_linux_trapcode(code) \ 200 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 201 _bsd_to_linux_trapcode[(code)]: \ 202 LINUX_T_UNKNOWN) 203 204 struct linux32_ps_strings { 205 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 206 u_int ps_nargvstr; /* the number of argument strings */ 207 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 208 u_int ps_nenvstr; /* the number of environment strings */ 209 }; 210 211 /* 212 * If FreeBSD & Linux have a difference of opinion about what a trap 213 * means, deal with it here. 214 * 215 * MPSAFE 216 */ 217 static int 218 translate_traps(int signal, int trap_code) 219 { 220 if (signal != SIGBUS) 221 return signal; 222 switch (trap_code) { 223 case T_PROTFLT: 224 case T_TSSFLT: 225 case T_DOUBLEFLT: 226 case T_PAGEFLT: 227 return SIGSEGV; 228 default: 229 return signal; 230 } 231 } 232 233 static int 234 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 235 { 236 Elf32_Auxargs *args; 237 Elf32_Addr *base; 238 Elf32_Addr *pos; 239 240 KASSERT(curthread->td_proc == imgp->proc && 241 (curthread->td_proc->p_flag & P_SA) == 0, 242 ("unsafe elf_linux_fixup(), should be curproc")); 243 base = (Elf32_Addr *)*stack_base; 244 args = (Elf32_Auxargs *)imgp->auxargs; 245 pos = base + (imgp->args->argc + imgp->args->envc + 2); 246 247 if (args->trace) 248 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1); 249 if (args->execfd != -1) 250 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 251 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 252 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 253 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 254 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 255 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 256 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 257 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 258 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 259 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 260 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 261 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 262 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 263 264 free(imgp->auxargs, M_TEMP); 265 imgp->auxargs = NULL; 266 267 base--; 268 suword32(base, (uint32_t)imgp->args->argc); 269 *stack_base = (register_t *)base; 270 return 0; 271 } 272 273 extern int _ucodesel, _ucode32sel, _udatasel; 274 extern unsigned long linux_sznonrtsigcode; 275 276 static void 277 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 278 { 279 struct thread *td = curthread; 280 struct proc *p = td->td_proc; 281 struct sigacts *psp; 282 struct trapframe *regs; 283 struct l_rt_sigframe *fp, frame; 284 int oonstack; 285 int sig; 286 int code; 287 288 sig = ksi->ksi_signo; 289 code = ksi->ksi_code; 290 PROC_LOCK_ASSERT(p, MA_OWNED); 291 psp = p->p_sigacts; 292 mtx_assert(&psp->ps_mtx, MA_OWNED); 293 regs = td->td_frame; 294 oonstack = sigonstack(regs->tf_rsp); 295 296 #ifdef DEBUG 297 if (ldebug(rt_sendsig)) 298 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 299 catcher, sig, (void*)mask, code); 300 #endif 301 /* 302 * Allocate space for the signal handler context. 303 */ 304 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 305 SIGISMEMBER(psp->ps_sigonstack, sig)) { 306 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 307 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 308 } else 309 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 310 mtx_unlock(&psp->ps_mtx); 311 312 /* 313 * Build the argument list for the signal handler. 314 */ 315 if (p->p_sysent->sv_sigtbl) 316 if (sig <= p->p_sysent->sv_sigsize) 317 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 318 319 bzero(&frame, sizeof(frame)); 320 321 frame.sf_handler = PTROUT(catcher); 322 frame.sf_sig = sig; 323 frame.sf_siginfo = PTROUT(&fp->sf_si); 324 frame.sf_ucontext = PTROUT(&fp->sf_sc); 325 326 /* Fill in POSIX parts */ 327 frame.sf_si.lsi_signo = sig; 328 frame.sf_si.lsi_code = code; 329 frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr); 330 331 /* 332 * Build the signal context to be used by sigreturn. 333 */ 334 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 335 frame.sf_sc.uc_link = 0; /* XXX ??? */ 336 337 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 338 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 339 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 340 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 341 PROC_UNLOCK(p); 342 343 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 344 345 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 346 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 347 frame.sf_sc.uc_mcontext.sc_fs = rfs(); 348 __asm __volatile("movl %%es,%0" : 349 "=rm" (frame.sf_sc.uc_mcontext.sc_es)); 350 __asm __volatile("movl %%ds,%0" : 351 "=rm" (frame.sf_sc.uc_mcontext.sc_ds)); 352 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 353 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 354 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 355 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 356 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 357 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 358 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 359 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 360 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 361 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 362 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 363 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 364 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 365 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 366 367 #ifdef DEBUG 368 if (ldebug(rt_sendsig)) 369 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 370 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 371 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 372 #endif 373 374 if (copyout(&frame, fp, sizeof(frame)) != 0) { 375 /* 376 * Process has trashed its stack; give it an illegal 377 * instruction to halt it in its tracks. 378 */ 379 #ifdef DEBUG 380 if (ldebug(rt_sendsig)) 381 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 382 fp, oonstack); 383 #endif 384 PROC_LOCK(p); 385 sigexit(td, SIGILL); 386 } 387 388 /* 389 * Build context to run handler in. 390 */ 391 regs->tf_rsp = PTROUT(fp); 392 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 393 linux_sznonrtsigcode; 394 regs->tf_rflags &= ~PSL_T; 395 regs->tf_cs = _ucode32sel; 396 regs->tf_ss = _udatasel; 397 load_ds(_udatasel); 398 td->td_pcb->pcb_ds = _udatasel; 399 load_es(_udatasel); 400 td->td_pcb->pcb_es = _udatasel; 401 PROC_LOCK(p); 402 mtx_lock(&psp->ps_mtx); 403 } 404 405 406 /* 407 * Send an interrupt to process. 408 * 409 * Stack is set up to allow sigcode stored 410 * in u. to call routine, followed by kcall 411 * to sigreturn routine below. After sigreturn 412 * resets the signal mask, the stack, and the 413 * frame pointer, it returns to the user 414 * specified pc, psl. 415 */ 416 static void 417 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 418 { 419 struct thread *td = curthread; 420 struct proc *p = td->td_proc; 421 struct sigacts *psp; 422 struct trapframe *regs; 423 struct l_sigframe *fp, frame; 424 l_sigset_t lmask; 425 int oonstack, i; 426 int sig, code; 427 428 sig = ksi->ksi_signo; 429 code = ksi->ksi_code; 430 PROC_LOCK_ASSERT(p, MA_OWNED); 431 psp = p->p_sigacts; 432 mtx_assert(&psp->ps_mtx, MA_OWNED); 433 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 434 /* Signal handler installed with SA_SIGINFO. */ 435 linux_rt_sendsig(catcher, ksi, mask); 436 return; 437 } 438 439 regs = td->td_frame; 440 oonstack = sigonstack(regs->tf_rsp); 441 442 #ifdef DEBUG 443 if (ldebug(sendsig)) 444 printf(ARGS(sendsig, "%p, %d, %p, %u"), 445 catcher, sig, (void*)mask, code); 446 #endif 447 448 /* 449 * Allocate space for the signal handler context. 450 */ 451 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 452 SIGISMEMBER(psp->ps_sigonstack, sig)) { 453 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 454 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 455 } else 456 fp = (struct l_sigframe *)regs->tf_rsp - 1; 457 mtx_unlock(&psp->ps_mtx); 458 PROC_UNLOCK(p); 459 460 /* 461 * Build the argument list for the signal handler. 462 */ 463 if (p->p_sysent->sv_sigtbl) 464 if (sig <= p->p_sysent->sv_sigsize) 465 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 466 467 bzero(&frame, sizeof(frame)); 468 469 frame.sf_handler = PTROUT(catcher); 470 frame.sf_sig = sig; 471 472 bsd_to_linux_sigset(mask, &lmask); 473 474 /* 475 * Build the signal context to be used by sigreturn. 476 */ 477 frame.sf_sc.sc_mask = lmask.__bits[0]; 478 frame.sf_sc.sc_gs = rgs(); 479 frame.sf_sc.sc_fs = rfs(); 480 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es)); 481 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds)); 482 frame.sf_sc.sc_edi = regs->tf_rdi; 483 frame.sf_sc.sc_esi = regs->tf_rsi; 484 frame.sf_sc.sc_ebp = regs->tf_rbp; 485 frame.sf_sc.sc_ebx = regs->tf_rbx; 486 frame.sf_sc.sc_edx = regs->tf_rdx; 487 frame.sf_sc.sc_ecx = regs->tf_rcx; 488 frame.sf_sc.sc_eax = regs->tf_rax; 489 frame.sf_sc.sc_eip = regs->tf_rip; 490 frame.sf_sc.sc_cs = regs->tf_cs; 491 frame.sf_sc.sc_eflags = regs->tf_rflags; 492 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 493 frame.sf_sc.sc_ss = regs->tf_ss; 494 frame.sf_sc.sc_err = regs->tf_err; 495 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 496 497 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 498 frame.sf_extramask[i] = lmask.__bits[i+1]; 499 500 if (copyout(&frame, fp, sizeof(frame)) != 0) { 501 /* 502 * Process has trashed its stack; give it an illegal 503 * instruction to halt it in its tracks. 504 */ 505 PROC_LOCK(p); 506 sigexit(td, SIGILL); 507 } 508 509 /* 510 * Build context to run handler in. 511 */ 512 regs->tf_rsp = PTROUT(fp); 513 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 514 regs->tf_rflags &= ~PSL_T; 515 regs->tf_cs = _ucode32sel; 516 regs->tf_ss = _udatasel; 517 load_ds(_udatasel); 518 td->td_pcb->pcb_ds = _udatasel; 519 load_es(_udatasel); 520 td->td_pcb->pcb_es = _udatasel; 521 PROC_LOCK(p); 522 mtx_lock(&psp->ps_mtx); 523 } 524 525 /* 526 * System call to cleanup state after a signal 527 * has been taken. Reset signal mask and 528 * stack state from context left by sendsig (above). 529 * Return to previous pc and psl as specified by 530 * context left by sendsig. Check carefully to 531 * make sure that the user has not modified the 532 * psl to gain improper privileges or to cause 533 * a machine fault. 534 */ 535 int 536 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 537 { 538 struct proc *p = td->td_proc; 539 struct l_sigframe frame; 540 struct trapframe *regs; 541 l_sigset_t lmask; 542 int eflags, i; 543 ksiginfo_t ksi; 544 545 regs = td->td_frame; 546 547 #ifdef DEBUG 548 if (ldebug(sigreturn)) 549 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 550 #endif 551 /* 552 * The trampoline code hands us the sigframe. 553 * It is unsafe to keep track of it ourselves, in the event that a 554 * program jumps out of a signal handler. 555 */ 556 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 557 return (EFAULT); 558 559 /* 560 * Check for security violations. 561 */ 562 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 563 eflags = frame.sf_sc.sc_eflags; 564 /* 565 * XXX do allow users to change the privileged flag PSL_RF. The 566 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 567 * sometimes set it there too. tf_eflags is kept in the signal 568 * context during signal handling and there is no other place 569 * to remember it, so the PSL_RF bit may be corrupted by the 570 * signal handler without us knowing. Corruption of the PSL_RF 571 * bit at worst causes one more or one less debugger trap, so 572 * allowing it is fairly harmless. 573 */ 574 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 575 return(EINVAL); 576 577 /* 578 * Don't allow users to load a valid privileged %cs. Let the 579 * hardware check for invalid selectors, excess privilege in 580 * other selectors, invalid %eip's and invalid %esp's. 581 */ 582 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 583 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 584 ksiginfo_init_trap(&ksi); 585 ksi.ksi_signo = SIGBUS; 586 ksi.ksi_code = BUS_OBJERR; 587 ksi.ksi_trapno = T_PROTFLT; 588 ksi.ksi_addr = (void *)regs->tf_rip; 589 trapsignal(td, &ksi); 590 return(EINVAL); 591 } 592 593 lmask.__bits[0] = frame.sf_sc.sc_mask; 594 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 595 lmask.__bits[i+1] = frame.sf_extramask[i]; 596 PROC_LOCK(p); 597 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 598 SIG_CANTMASK(td->td_sigmask); 599 signotify(td); 600 PROC_UNLOCK(p); 601 602 /* 603 * Restore signal context. 604 */ 605 /* Selectors were restored by the trampoline. */ 606 regs->tf_rdi = frame.sf_sc.sc_edi; 607 regs->tf_rsi = frame.sf_sc.sc_esi; 608 regs->tf_rbp = frame.sf_sc.sc_ebp; 609 regs->tf_rbx = frame.sf_sc.sc_ebx; 610 regs->tf_rdx = frame.sf_sc.sc_edx; 611 regs->tf_rcx = frame.sf_sc.sc_ecx; 612 regs->tf_rax = frame.sf_sc.sc_eax; 613 regs->tf_rip = frame.sf_sc.sc_eip; 614 regs->tf_cs = frame.sf_sc.sc_cs; 615 regs->tf_rflags = eflags; 616 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 617 regs->tf_ss = frame.sf_sc.sc_ss; 618 619 return (EJUSTRETURN); 620 } 621 622 /* 623 * System call to cleanup state after a signal 624 * has been taken. Reset signal mask and 625 * stack state from context left by rt_sendsig (above). 626 * Return to previous pc and psl as specified by 627 * context left by sendsig. Check carefully to 628 * make sure that the user has not modified the 629 * psl to gain improper privileges or to cause 630 * a machine fault. 631 */ 632 int 633 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 634 { 635 struct proc *p = td->td_proc; 636 struct l_ucontext uc; 637 struct l_sigcontext *context; 638 l_stack_t *lss; 639 stack_t ss; 640 struct trapframe *regs; 641 int eflags; 642 ksiginfo_t ksi; 643 644 regs = td->td_frame; 645 646 #ifdef DEBUG 647 if (ldebug(rt_sigreturn)) 648 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 649 #endif 650 /* 651 * The trampoline code hands us the ucontext. 652 * It is unsafe to keep track of it ourselves, in the event that a 653 * program jumps out of a signal handler. 654 */ 655 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 656 return (EFAULT); 657 658 context = &uc.uc_mcontext; 659 660 /* 661 * Check for security violations. 662 */ 663 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 664 eflags = context->sc_eflags; 665 /* 666 * XXX do allow users to change the privileged flag PSL_RF. The 667 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 668 * sometimes set it there too. tf_eflags is kept in the signal 669 * context during signal handling and there is no other place 670 * to remember it, so the PSL_RF bit may be corrupted by the 671 * signal handler without us knowing. Corruption of the PSL_RF 672 * bit at worst causes one more or one less debugger trap, so 673 * allowing it is fairly harmless. 674 */ 675 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 676 return(EINVAL); 677 678 /* 679 * Don't allow users to load a valid privileged %cs. Let the 680 * hardware check for invalid selectors, excess privilege in 681 * other selectors, invalid %eip's and invalid %esp's. 682 */ 683 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 684 if (!CS_SECURE(context->sc_cs)) { 685 ksiginfo_init_trap(&ksi); 686 ksi.ksi_signo = SIGBUS; 687 ksi.ksi_code = BUS_OBJERR; 688 ksi.ksi_trapno = T_PROTFLT; 689 ksi.ksi_addr = (void *)regs->tf_rip; 690 trapsignal(td, &ksi); 691 return(EINVAL); 692 } 693 694 PROC_LOCK(p); 695 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 696 SIG_CANTMASK(td->td_sigmask); 697 signotify(td); 698 PROC_UNLOCK(p); 699 700 /* 701 * Restore signal context 702 */ 703 /* Selectors were restored by the trampoline. */ 704 regs->tf_rdi = context->sc_edi; 705 regs->tf_rsi = context->sc_esi; 706 regs->tf_rbp = context->sc_ebp; 707 regs->tf_rbx = context->sc_ebx; 708 regs->tf_rdx = context->sc_edx; 709 regs->tf_rcx = context->sc_ecx; 710 regs->tf_rax = context->sc_eax; 711 regs->tf_rip = context->sc_eip; 712 regs->tf_cs = context->sc_cs; 713 regs->tf_rflags = eflags; 714 regs->tf_rsp = context->sc_esp_at_signal; 715 regs->tf_ss = context->sc_ss; 716 717 /* 718 * call sigaltstack & ignore results.. 719 */ 720 lss = &uc.uc_stack; 721 ss.ss_sp = PTRIN(lss->ss_sp); 722 ss.ss_size = lss->ss_size; 723 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 724 725 #ifdef DEBUG 726 if (ldebug(rt_sigreturn)) 727 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 728 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 729 #endif 730 (void)kern_sigaltstack(td, &ss, NULL); 731 732 return (EJUSTRETURN); 733 } 734 735 /* 736 * MPSAFE 737 */ 738 static void 739 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 740 { 741 args[0] = tf->tf_rbx; 742 args[1] = tf->tf_rcx; 743 args[2] = tf->tf_rdx; 744 args[3] = tf->tf_rsi; 745 args[4] = tf->tf_rdi; 746 args[5] = tf->tf_rbp; /* Unconfirmed */ 747 *params = NULL; /* no copyin */ 748 } 749 750 /* 751 * If a linux binary is exec'ing something, try this image activator 752 * first. We override standard shell script execution in order to 753 * be able to modify the interpreter path. We only do this if a linux 754 * binary is doing the exec, so we do not create an EXEC module for it. 755 */ 756 static int exec_linux_imgact_try(struct image_params *iparams); 757 758 static int 759 exec_linux_imgact_try(struct image_params *imgp) 760 { 761 const char *head = (const char *)imgp->image_header; 762 char *rpath; 763 int error = -1, len; 764 765 /* 766 * The interpreter for shell scripts run from a linux binary needs 767 * to be located in /compat/linux if possible in order to recursively 768 * maintain linux path emulation. 769 */ 770 if (((const short *)head)[0] == SHELLMAGIC) { 771 /* 772 * Run our normal shell image activator. If it succeeds attempt 773 * to use the alternate path for the interpreter. If an alternate 774 * path is found, use our stringspace to store it. 775 */ 776 if ((error = exec_shell_imgact(imgp)) == 0) { 777 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 778 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 779 if (rpath != NULL) { 780 len = strlen(rpath) + 1; 781 782 if (len <= MAXSHELLCMDLEN) { 783 memcpy(imgp->interpreter_name, rpath, len); 784 } 785 free(rpath, M_TEMP); 786 } 787 } 788 } 789 return(error); 790 } 791 792 /* 793 * Clear registers on exec 794 * XXX copied from ia32_signal.c. 795 */ 796 static void 797 exec_linux_setregs(td, entry, stack, ps_strings) 798 struct thread *td; 799 u_long entry; 800 u_long stack; 801 u_long ps_strings; 802 { 803 struct trapframe *regs = td->td_frame; 804 struct pcb *pcb = td->td_pcb; 805 806 wrmsr(MSR_FSBASE, 0); 807 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 808 pcb->pcb_fsbase = 0; 809 pcb->pcb_gsbase = 0; 810 load_ds(_udatasel); 811 load_es(_udatasel); 812 load_fs(_udatasel); 813 load_gs(0); 814 pcb->pcb_ds = _udatasel; 815 pcb->pcb_es = _udatasel; 816 pcb->pcb_fs = _udatasel; 817 pcb->pcb_gs = 0; 818 819 bzero((char *)regs, sizeof(struct trapframe)); 820 regs->tf_rip = entry; 821 regs->tf_rsp = stack; 822 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 823 regs->tf_ss = _udatasel; 824 regs->tf_cs = _ucode32sel; 825 regs->tf_rbx = ps_strings; 826 load_cr0(rcr0() | CR0_MP | CR0_TS); 827 fpstate_drop(td); 828 829 /* Return via doreti so that we can change to a different %cs */ 830 pcb->pcb_flags |= PCB_FULLCTX; 831 td->td_retval[1] = 0; 832 } 833 834 /* 835 * XXX copied from ia32_sysvec.c. 836 */ 837 static register_t * 838 linux_copyout_strings(struct image_params *imgp) 839 { 840 int argc, envc; 841 u_int32_t *vectp; 842 char *stringp, *destp; 843 u_int32_t *stack_base; 844 struct linux32_ps_strings *arginfo; 845 int sigcodesz; 846 847 /* 848 * Calculate string base and vector table pointers. 849 * Also deal with signal trampoline code for this exec type. 850 */ 851 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 852 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode); 853 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE - 854 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 855 856 /* 857 * install sigcode 858 */ 859 if (sigcodesz) 860 copyout(imgp->proc->p_sysent->sv_sigcode, 861 ((caddr_t)arginfo - sigcodesz), szsigcode); 862 863 /* 864 * If we have a valid auxargs ptr, prepare some room 865 * on the stack. 866 */ 867 if (imgp->auxargs) { 868 /* 869 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 870 * lower compatibility. 871 */ 872 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size 873 : (AT_COUNT * 2); 874 /* 875 * The '+ 2' is for the null pointers at the end of each of 876 * the arg and env vector sets,and imgp->auxarg_size is room 877 * for argument of Runtime loader. 878 */ 879 vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + 880 imgp->auxarg_size) * sizeof(u_int32_t)); 881 882 } else 883 /* 884 * The '+ 2' is for the null pointers at the end of each of 885 * the arg and env vector sets 886 */ 887 vectp = (u_int32_t *) 888 (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); 889 890 /* 891 * vectp also becomes our initial stack base 892 */ 893 stack_base = vectp; 894 895 stringp = imgp->args->begin_argv; 896 argc = imgp->args->argc; 897 envc = imgp->args->envc; 898 /* 899 * Copy out strings - arguments and environment. 900 */ 901 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 902 903 /* 904 * Fill in "ps_strings" struct for ps, w, etc. 905 */ 906 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); 907 suword32(&arginfo->ps_nargvstr, argc); 908 909 /* 910 * Fill in argument portion of vector table. 911 */ 912 for (; argc > 0; --argc) { 913 suword32(vectp++, (u_int32_t)(intptr_t)destp); 914 while (*stringp++ != 0) 915 destp++; 916 destp++; 917 } 918 919 /* a null vector table pointer separates the argp's from the envp's */ 920 suword32(vectp++, 0); 921 922 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); 923 suword32(&arginfo->ps_nenvstr, envc); 924 925 /* 926 * Fill in environment portion of vector table. 927 */ 928 for (; envc > 0; --envc) { 929 suword32(vectp++, (u_int32_t)(intptr_t)destp); 930 while (*stringp++ != 0) 931 destp++; 932 destp++; 933 } 934 935 /* end of vector table is a null pointer */ 936 suword32(vectp, 0); 937 938 return ((register_t *)stack_base); 939 } 940 941 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 942 "32-bit Linux emulation"); 943 944 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 945 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 946 &linux32_maxdsiz, 0, ""); 947 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 948 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 949 &linux32_maxssiz, 0, ""); 950 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 951 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 952 &linux32_maxvmem, 0, ""); 953 954 /* 955 * XXX copied from ia32_sysvec.c. 956 */ 957 static void 958 linux32_fixlimits(struct proc *p) 959 { 960 struct plimit *oldlim, *newlim; 961 962 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 && 963 linux32_maxvmem == 0) 964 return; 965 newlim = lim_alloc(); 966 PROC_LOCK(p); 967 oldlim = p->p_limit; 968 lim_copy(newlim, oldlim); 969 if (linux32_maxdsiz != 0) { 970 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz) 971 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz; 972 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz) 973 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz; 974 } 975 if (linux32_maxssiz != 0) { 976 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz) 977 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz; 978 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz) 979 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz; 980 } 981 if (linux32_maxvmem != 0) { 982 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem) 983 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem; 984 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem) 985 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem; 986 } 987 p->p_limit = newlim; 988 PROC_UNLOCK(p); 989 lim_free(oldlim); 990 } 991 992 struct sysentvec elf_linux_sysvec = { 993 LINUX_SYS_MAXSYSCALL, 994 linux_sysent, 995 0xff, 996 LINUX_SIGTBLSZ, 997 bsd_to_linux_signal, 998 ELAST + 1, 999 bsd_to_linux_errno, 1000 translate_traps, 1001 elf_linux_fixup, 1002 linux_sendsig, 1003 linux_sigcode, 1004 &linux_szsigcode, 1005 linux_prepsyscall, 1006 "Linux ELF32", 1007 elf32_coredump, 1008 exec_linux_imgact_try, 1009 LINUX_MINSIGSTKSZ, 1010 PAGE_SIZE, 1011 VM_MIN_ADDRESS, 1012 LINUX32_USRSTACK, 1013 LINUX32_USRSTACK, 1014 LINUX32_PS_STRINGS, 1015 VM_PROT_ALL, 1016 linux_copyout_strings, 1017 exec_linux_setregs, 1018 linux32_fixlimits 1019 }; 1020 1021 static Elf32_Brandinfo linux_brand = { 1022 ELFOSABI_LINUX, 1023 EM_386, 1024 "Linux", 1025 "/compat/linux", 1026 "/lib/ld-linux.so.1", 1027 &elf_linux_sysvec, 1028 NULL, 1029 }; 1030 1031 static Elf32_Brandinfo linux_glibc2brand = { 1032 ELFOSABI_LINUX, 1033 EM_386, 1034 "Linux", 1035 "/compat/linux", 1036 "/lib/ld-linux.so.2", 1037 &elf_linux_sysvec, 1038 NULL, 1039 }; 1040 1041 Elf32_Brandinfo *linux_brandlist[] = { 1042 &linux_brand, 1043 &linux_glibc2brand, 1044 NULL 1045 }; 1046 1047 static int 1048 linux_elf_modevent(module_t mod, int type, void *data) 1049 { 1050 Elf32_Brandinfo **brandinfo; 1051 int error; 1052 struct linux_ioctl_handler **lihp; 1053 1054 error = 0; 1055 1056 switch(type) { 1057 case MOD_LOAD: 1058 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1059 ++brandinfo) 1060 if (elf32_insert_brand_entry(*brandinfo) < 0) 1061 error = EINVAL; 1062 if (error == 0) { 1063 SET_FOREACH(lihp, linux_ioctl_handler_set) 1064 linux_ioctl_register_handler(*lihp); 1065 if (bootverbose) 1066 printf("Linux ELF exec handler installed\n"); 1067 } else 1068 printf("cannot insert Linux ELF brand handler\n"); 1069 break; 1070 case MOD_UNLOAD: 1071 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1072 ++brandinfo) 1073 if (elf32_brand_inuse(*brandinfo)) 1074 error = EBUSY; 1075 if (error == 0) { 1076 for (brandinfo = &linux_brandlist[0]; 1077 *brandinfo != NULL; ++brandinfo) 1078 if (elf32_remove_brand_entry(*brandinfo) < 0) 1079 error = EINVAL; 1080 } 1081 if (error == 0) { 1082 SET_FOREACH(lihp, linux_ioctl_handler_set) 1083 linux_ioctl_unregister_handler(*lihp); 1084 if (bootverbose) 1085 printf("Linux ELF exec handler removed\n"); 1086 linux_mib_destroy(); 1087 } else 1088 printf("Could not deinstall ELF interpreter entry\n"); 1089 break; 1090 default: 1091 break; 1092 } 1093 return error; 1094 } 1095 1096 static moduledata_t linux_elf_mod = { 1097 "linuxelf", 1098 linux_elf_modevent, 1099 0 1100 }; 1101 1102 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1103